rdf-raptor 0.4.0 → 0.4.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,130 @@
1
+ module RDF::Raptor::FFI
2
+ ##
3
+ # A foreign-function interface (FFI) to `libraptor` 1.4.x.
4
+ #
5
+ # @see http://librdf.org/raptor/libraptor.html
6
+ module V1
7
+ autoload :IOStream, 'rdf/raptor/ffi/v1/iostream'
8
+ autoload :IOStreamHandler, 'rdf/raptor/ffi/v1/iostream_handler'
9
+ autoload :Parser, 'rdf/raptor/ffi/v1/parser'
10
+ autoload :Serializer, 'rdf/raptor/ffi/v1/serializer'
11
+ autoload :Statement, 'rdf/raptor/ffi/v1/statement'
12
+ autoload :URI, 'rdf/raptor/ffi/v1/uri'
13
+
14
+ extend ::FFI::Library
15
+ ffi_lib RDF::Raptor::LIBRAPTOR
16
+
17
+ # TODO: Ideally this would be an enum, but the JRuby FFI (as of
18
+ # version 1.4.0) has problems with enums as part of structs:
19
+ # `Unknown field type: #<FFI::Enum> (ArgumentError)`
20
+ RAPTOR_IDENTIFIER_TYPE_UNKNOWN = 0
21
+ RAPTOR_IDENTIFIER_TYPE_RESOURCE = 1
22
+ RAPTOR_IDENTIFIER_TYPE_ANONYMOUS = 2
23
+ RAPTOR_IDENTIFIER_TYPE_LITERAL = 5
24
+
25
+ # @see http://librdf.org/raptor/api-1.4/tutorial-initialising-finishing.html
26
+ attach_function :raptor_init, [], :void
27
+ attach_function :raptor_finish, [], :void
28
+ attach_function :raptor_alloc_memory, [:size_t], :pointer
29
+ attach_function :raptor_calloc_memory, [:size_t, :size_t], :pointer
30
+ attach_function :raptor_free_memory, [:pointer], :void
31
+
32
+ # @see http://librdf.org/raptor/api-1.4/raptor-section-locator.html
33
+ typedef :pointer, :raptor_locator
34
+ attach_function :raptor_locator_line, [:raptor_locator], :int
35
+ attach_function :raptor_locator_column, [:raptor_locator], :int
36
+ attach_function :raptor_locator_byte, [:raptor_locator], :int
37
+
38
+ # @see http://librdf.org/raptor/api-1.4/raptor-section-general.html
39
+ attach_variable :raptor_version_major, :int
40
+ attach_variable :raptor_version_minor, :int
41
+ attach_variable :raptor_version_release, :int
42
+ attach_variable :raptor_version_decimal, :int
43
+ callback :raptor_message_handler, [:pointer, :raptor_locator, :string], :void
44
+
45
+ # @see http://librdf.org/raptor/api-1.4/raptor-section-uri.html
46
+ typedef :pointer, :raptor_uri
47
+ attach_function :raptor_new_uri, [:string], :raptor_uri
48
+ attach_function :raptor_uri_copy, [:raptor_uri], :raptor_uri
49
+ attach_function :raptor_uri_equals, [:raptor_uri, :raptor_uri], :int
50
+ attach_function :raptor_uri_as_string, [:raptor_uri], :string
51
+ attach_function :raptor_uri_to_string, [:raptor_uri], :string
52
+ attach_function :raptor_uri_print, [:raptor_uri, :pointer], :void
53
+ attach_function :raptor_free_uri, [:raptor_uri], :void
54
+
55
+ # @see http://librdf.org/raptor/api-1.4/raptor-section-triples.html
56
+ typedef :int, :raptor_identifier_type
57
+ typedef :pointer, :raptor_identifier
58
+ typedef :pointer, :raptor_statement
59
+ attach_function :raptor_statement_compare, [:raptor_statement, :raptor_statement], :int
60
+ attach_function :raptor_print_statement, [:raptor_statement, :pointer], :void
61
+ attach_function :raptor_print_statement_as_ntriples, [:pointer, :pointer], :void
62
+ attach_function :raptor_statement_part_as_string, [:pointer, :raptor_identifier_type, :raptor_uri, :pointer], :string
63
+
64
+ # @see http://librdf.org/raptor/api-1.4/raptor-section-parser.html
65
+ callback :raptor_statement_handler, [:pointer, :raptor_statement], :void
66
+ typedef :pointer, :raptor_parser
67
+ attach_function :raptor_new_parser, [:string], :raptor_parser
68
+ attach_function :raptor_set_error_handler, [:raptor_parser, :pointer, :raptor_message_handler], :void
69
+ attach_function :raptor_set_warning_handler, [:raptor_parser, :pointer, :raptor_message_handler], :void
70
+ attach_function :raptor_set_statement_handler, [:raptor_parser, :pointer, :raptor_statement_handler], :void
71
+ attach_function :raptor_parse_file, [:raptor_parser, :raptor_uri, :raptor_uri], :int
72
+ attach_function :raptor_parse_file_stream, [:raptor_parser, :pointer, :string, :raptor_uri], :int
73
+ attach_function :raptor_parse_uri, [:raptor_parser, :raptor_uri, :raptor_uri], :int
74
+ attach_function :raptor_start_parse, [:raptor_parser, :string], :int
75
+ attach_function :raptor_parse_chunk, [:raptor_parser, :string, :size_t, :int], :int
76
+ attach_function :raptor_get_mime_type, [:raptor_parser], :string
77
+ attach_function :raptor_set_parser_strict, [:raptor_parser, :int], :void
78
+ attach_function :raptor_get_need_base_uri, [:raptor_parser], :int
79
+ attach_function :raptor_free_parser, [:raptor_parser], :void
80
+
81
+ # @see http://librdf.org/raptor/api-1.4/raptor-section-iostream.html
82
+ typedef :pointer, :raptor_iostream
83
+ attach_function :raptor_new_iostream_from_handler2, [:pointer, :pointer], :raptor_iostream
84
+ attach_function :raptor_free_iostream, [:raptor_iostream], :void
85
+ callback :raptor_iostream_init_func, [:pointer], :int
86
+ callback :raptor_iostream_finish_func, [:pointer], :void
87
+ callback :raptor_iostream_write_byte_func, [:pointer, :int], :int
88
+ callback :raptor_iostream_write_bytes_func, [:pointer, :pointer, :size_t, :size_t], :int
89
+ callback :raptor_iostream_write_end_func, [:pointer], :void
90
+ callback :raptor_iostream_read_bytes_func, [:pointer, :pointer, :size_t, :size_t], :int
91
+ callback :raptor_iostream_read_eof_func, [:pointer], :int
92
+
93
+ # @see http://librdf.org/raptor/api-1.4/raptor-section-xml-namespace.html
94
+ typedef :pointer, :raptor_namespace
95
+
96
+ # @see http://librdf.org/raptor/api-1.4/raptor-section-serializer.html
97
+ typedef :pointer, :raptor_serializer
98
+ attach_function :raptor_new_serializer, [:string], :raptor_serializer
99
+ attach_function :raptor_free_serializer, [:raptor_serializer], :void
100
+ attach_function :raptor_serialize_start_to_iostream, [:raptor_serializer, :raptor_uri, :raptor_iostream], :int
101
+ attach_function :raptor_serialize_start_to_filename, [:raptor_serializer, :string], :int
102
+ attach_function :raptor_serialize_statement, [:raptor_serializer, :raptor_statement], :int
103
+ attach_function :raptor_serialize_end, [:raptor_serializer], :int
104
+ attach_function :raptor_serializer_set_error_handler, [:raptor_serializer, :pointer, :raptor_message_handler], :void
105
+ attach_function :raptor_serializer_set_warning_handler, [:raptor_serializer, :pointer, :raptor_message_handler], :void
106
+
107
+ # Initialize the world.
108
+ # We do this exactly once and never release because we can't delegate
109
+ # any memory management to the Ruby GC.
110
+ # Internally `raptor_init`/`raptor_finish` work with reference counts.
111
+ raptor_init
112
+
113
+ ##
114
+ # Allocates memory for the string `str` inside `libraptor`, copying the
115
+ # string into the newly-allocated buffer.
116
+ #
117
+ # The buffer should later be deallocated using `raptor_free_string`.
118
+ #
119
+ # @return [FFI::Pointer]
120
+ def raptor_new_string(str)
121
+ ptr = V1.raptor_alloc_memory(str.bytesize + 1)
122
+ ptr.put_string(0, str)
123
+ ptr
124
+ end
125
+ module_function :raptor_new_string
126
+
127
+ alias_method :raptor_free_string, :raptor_free_memory
128
+ module_function :raptor_free_string
129
+ end # V1
130
+ end # RDF::Raptor::FFI
@@ -0,0 +1,47 @@
1
+ module RDF::Raptor::FFI::V1
2
+ ##
3
+ # This class provides an I/O stream that can write to filenames, `FILE*`,
4
+ # strings and user-defined output via callbacks.
5
+ #
6
+ # @see http://librdf.org/raptor/api-1.4/raptor-section-iostream.html
7
+ class IOStream < ::FFI::ManagedStruct
8
+ include RDF::Raptor::FFI
9
+ layout :user_data, :pointer # the actual layout is private
10
+
11
+ ##
12
+ # @overload initialize(ptr)
13
+ # @param [FFI::Pointer] ptr
14
+ #
15
+ # @overload initialize(handler)
16
+ # @param [V1::IOStreamHandler] handler
17
+ #
18
+ # @overload initialize(file)
19
+ # @param [File, Tempfile] file
20
+ #
21
+ def initialize(ptr_or_obj, options = {})
22
+ ptr = case ptr_or_obj
23
+ when FFI::Pointer
24
+ ptr_or_obj
25
+ when V1::IOStreamHandler
26
+ @handler = ptr_or_obj # prevents premature GC
27
+ V1.raptor_new_iostream_from_handler2(self, @handler)
28
+ when File, Tempfile
29
+ V1.raptor_new_iostream_to_filename(File.expand_path(ptr_or_obj.path))
30
+ when false
31
+ V1.raptor_new_iostream_to_sink()
32
+ else nil
33
+ end
34
+ raise ArgumentError, "invalid argument: #{ptr_or_obj.inspect}" if ptr.nil? || ptr.null?
35
+ super(ptr)
36
+ end
37
+
38
+ ##
39
+ # Releases `libraptor` memory associated with this structure.
40
+ #
41
+ # @param [FFI::Pointer] ptr
42
+ # @return [void]
43
+ def self.release(ptr)
44
+ V1.raptor_free_iostream(ptr)
45
+ end
46
+ end # IOStream
47
+ end # RDF::Raptor::FFI::V1
@@ -0,0 +1,151 @@
1
+ module RDF::Raptor::FFI::V1
2
+ ##
3
+ # @see http://librdf.org/raptor/api-1.4/raptor-section-iostream.html
4
+ class IOStreamHandler < ::FFI::Struct
5
+ include RDF::Raptor::FFI
6
+ layout :version, :int,
7
+ :init, :raptor_iostream_init_func,
8
+ :finish, :raptor_iostream_finish_func,
9
+ :write_byte, :raptor_iostream_write_byte_func,
10
+ :write_bytes, :raptor_iostream_write_bytes_func,
11
+ :write_end, :raptor_iostream_write_end_func,
12
+ :read_bytes, :raptor_iostream_read_bytes_func,
13
+ :read_eof, :raptor_iostream_read_eof_func
14
+
15
+ HANDLERS = [:init, :finish, :write_byte, :write_bytes, :read_bytes, :read_eof]
16
+
17
+ ##
18
+ # The IO object to operate upon.
19
+ #
20
+ # @return [IO]
21
+ attr_accessor :io
22
+
23
+ ##
24
+ # @overload initialize(ptr)
25
+ # @param [FFI::Pointer] ptr
26
+ #
27
+ # @overload initialize(io)
28
+ # @param [IO, StringIO] io
29
+ #
30
+ def initialize(ptr_or_io = nil)
31
+ ptr = case ptr_or_io
32
+ when FFI::Pointer
33
+ ptr_or_io
34
+ when IO, StringIO
35
+ @io = ptr_or_io
36
+ nil
37
+ when nil then nil
38
+ else
39
+ raise ArgumentError, "invalid argument: #{ptr_or_io.inspect}"
40
+ end
41
+ super(ptr)
42
+ initialize!
43
+ end
44
+
45
+ ##
46
+ # @return [void]
47
+ def initialize!
48
+ self[:version] = 2
49
+
50
+ #define_handler(:init) do |context|
51
+ # $stderr.puts("#{self.class}: init")
52
+ #end
53
+ #define_handler(:finish) do |context|
54
+ # $stderr.puts("#{self.class}: finish")
55
+ #end
56
+ define_handler(:write_byte) do |context, byte|
57
+ begin
58
+ @io.putc(byte)
59
+ 0
60
+ rescue => e
61
+ $stderr.puts("#{e} in #{self.class}#write_byte")
62
+ 1
63
+ end
64
+ end
65
+ define_handler(:write_bytes) do |context, data, size, nmemb|
66
+ begin
67
+ @io.write(data.read_string(size * nmemb))
68
+ 0
69
+ rescue => e
70
+ $stderr.puts("#{e} in #{self.class}#write_bytes")
71
+ 1
72
+ end
73
+ end
74
+ #define_handler(:write_end) do |context|
75
+ # $stderr.puts("#{self.class}: write_end")
76
+ #end
77
+ #define_handler(:read_bytes) do |context, data, size, nmemb|
78
+ # $stderr.puts("#{self.class}: read_bytes")
79
+ #end
80
+ #define_handler(:read_eof) do |context|
81
+ # $stderr.puts("#{self.class}: read_eof")
82
+ #end
83
+ end
84
+
85
+ ##
86
+ # @param [Proc] func
87
+ # @return [void]
88
+ def init_handler=(func)
89
+ define_handler(:init, &func)
90
+ end
91
+ alias_method :init=, :init_handler=
92
+
93
+ ##
94
+ # @param [Proc] func
95
+ # @return [void]
96
+ def finish_handler=(func)
97
+ define_handler(:finish, &func)
98
+ end
99
+ alias_method :finish=, :finish_handler=
100
+
101
+ ##
102
+ # @param [Proc] func
103
+ # @return [void]
104
+ def write_byte_handler=(func)
105
+ define_handler(:write_byte, &func)
106
+ end
107
+ alias_method :write_byte=, :write_byte_handler=
108
+
109
+ ##
110
+ # @param [Proc] func
111
+ # @return [void]
112
+ def write_bytes_handler=(func)
113
+ define_handler(:write_bytes, &func)
114
+ end
115
+ alias_method :write_bytes=, :write_bytes_handler=
116
+
117
+ ##
118
+ # @param [Proc] func
119
+ # @return [void]
120
+ def write_end_handler=(func)
121
+ define_handler(:write_end, &func)
122
+ end
123
+ alias_method :write_end=, :write_end_handler=
124
+
125
+ ##
126
+ # @param [Proc] func
127
+ # @return [void]
128
+ def read_bytes_handler=(func)
129
+ define_handler(:read_bytes, &func)
130
+ end
131
+ alias_method :read_bytes=, :read_bytes_handler=
132
+
133
+ ##
134
+ # @param [Proc] func
135
+ # @return [void]
136
+ def read_eof_handler=(func)
137
+ define_handler(:read_eof, &func)
138
+ end
139
+ alias_method :read_eof=, :read_eof_handler=
140
+
141
+ ##
142
+ # @param [Symbol, #to_sym] name
143
+ # @return [void]
144
+ def define_handler(name, &block)
145
+ name = name.to_sym
146
+ raise ArgumentError, "invalid IOStreamHandler function name: #{name}" unless HANDLERS.include?(name)
147
+ @procs ||= {} # prevents premature GC of the procs
148
+ @procs[name] = self[name] = block
149
+ end
150
+ end # IOStreamHandler
151
+ end # RDF::Raptor::FFI::V1
@@ -0,0 +1,205 @@
1
+ module RDF::Raptor::FFI::V1
2
+ ##
3
+ # This class provides the functionality of turning syntaxes into RDF
4
+ # triples - RDF parsing.
5
+ #
6
+ # @see http://librdf.org/raptor/api-1.4/raptor-section-parser.html
7
+ class Parser < ::FFI::ManagedStruct
8
+ include RDF::Raptor::FFI
9
+ layout :world, :pointer # the actual layout is private
10
+
11
+ # The default base URI
12
+ BASE_URI = 'file:///dev/stdin'
13
+
14
+ # The maximum chunk size for `#parse_stream`
15
+ BUFFER_SIZE = 64 * 1024
16
+
17
+ ##
18
+ # @overload initialize(ptr)
19
+ # @param [FFI::Pointer] ptr
20
+ #
21
+ # @overload initialize(name)
22
+ # @param [Symbol, String] name
23
+ #
24
+ def initialize(ptr_or_name)
25
+ ptr = case ptr_or_name
26
+ when FFI::Pointer then ptr_or_name
27
+ when Symbol then V1.raptor_new_parser(ptr_or_name.to_s)
28
+ when String then V1.raptor_new_parser(ptr_or_name)
29
+ else nil
30
+ end
31
+ raise ArgumentError, "invalid argument: #{ptr_or_name.inspect}" if ptr.nil? || ptr.null?
32
+ super(ptr)
33
+ end
34
+
35
+ ##
36
+ # Releases `libraptor` memory associated with this structure.
37
+ #
38
+ # @param [FFI::Pointer] ptr
39
+ # @return [void]
40
+ def self.release(ptr)
41
+ V1.raptor_free_parser(ptr)
42
+ end
43
+
44
+ ##
45
+ # @param [Proc] handler
46
+ # @return [void]
47
+ def error_handler=(handler)
48
+ V1.raptor_set_error_handler(self, self, handler)
49
+ end
50
+
51
+ ##
52
+ # @param [Proc] handler
53
+ # @return [void]
54
+ def warning_handler=(handler)
55
+ V1.raptor_set_warning_handler(self, self, handler)
56
+ end
57
+
58
+ ##
59
+ # @param [Proc] handler
60
+ # @return [void]
61
+ def statement_handler=(handler)
62
+ V1.raptor_set_statement_handler(self, self, handler)
63
+ end
64
+
65
+ ##
66
+ # @param [Object] input
67
+ # the input to parse
68
+ # @param [Hash{Symbol => Object}] options
69
+ # any additional options for parsing
70
+ # @option options [String, #to_s] :base_uri (nil)
71
+ # the base URI to use when resolving relative URIs
72
+ # @yield [parser, statement]
73
+ # each statement in the input
74
+ # @yieldparam [FFI::Pointer] parser
75
+ # @yieldparam [FFI::Pointer] statement
76
+ # @yieldreturn [void] ignored
77
+ # @return [void]
78
+ def parse(input, options = {}, &block)
79
+ case input
80
+ when RDF::URI, %r(^(file|https|http|ftp)://)
81
+ parse_url(input, options, &block)
82
+ when File, Tempfile
83
+ parse_file(input, options, &block)
84
+ when IO, StringIO
85
+ parse_stream(input, options, &block)
86
+ when String
87
+ parse_buffer(input, options, &block)
88
+ else
89
+ raise ArgumentError, "don't know how to parse #{input.inspect}"
90
+ end
91
+ end
92
+
93
+ ##
94
+ # @param [RDF::URI, String, #to_s] url
95
+ # the input URL to parse
96
+ # @param [Hash{Symbol => Object}] options
97
+ # any additional options for parsing (see {#parse})
98
+ # @yield [parser, statement]
99
+ # each statement in the input
100
+ # @yieldparam [FFI::Pointer] parser
101
+ # @yieldparam [FFI::Pointer] statement
102
+ # @yieldreturn [void] ignored
103
+ # @return [void]
104
+ def parse_url(url, options = {}, &block)
105
+ self.statement_handler = block if block_given?
106
+
107
+ data_url = V1::URI.new((url.respond_to?(:to_uri) ? url.to_uri : url).to_s)
108
+ base_uri = options[:base_uri].to_s.empty? ? nil : V1::URI.new(options[:base_uri].to_s)
109
+
110
+ result = V1.raptor_parse_uri(self, data_url, base_uri)
111
+ # TODO: error handling if result.nonzero?
112
+ end
113
+ alias_method :parse_uri, :parse_url
114
+
115
+ ##
116
+ # @param [File, Tempfile, #path] file
117
+ # the input file to parse
118
+ # @param [Hash{Symbol => Object}] options
119
+ # any additional options for parsing (see {#parse})
120
+ # @yield [parser, statement]
121
+ # each statement in the input
122
+ # @yieldparam [FFI::Pointer] parser
123
+ # @yieldparam [FFI::Pointer] statement
124
+ # @yieldreturn [void] ignored
125
+ # @return [void]
126
+ def parse_file(file, options = {}, &block)
127
+ self.statement_handler = block if block_given?
128
+
129
+ data_url = V1::URI.new("file://#{File.expand_path(file.path)}")
130
+ base_uri = options[:base_uri].to_s.empty? ? nil : V1::URI.new(options[:base_uri].to_s)
131
+
132
+ result = V1.raptor_parse_file(self, data_url, base_uri)
133
+ # TODO: error handling if result.nonzero?
134
+ end
135
+
136
+ ##
137
+ # @param [IO, StringIO, #readpartial] stream
138
+ # the input stream to parse
139
+ # @param [Hash{Symbol => Object}] options
140
+ # any additional options for parsing (see {#parse})
141
+ # @yield [parser, statement]
142
+ # each statement in the input
143
+ # @yieldparam [FFI::Pointer] parser
144
+ # @yieldparam [FFI::Pointer] statement
145
+ # @yieldreturn [void] ignored
146
+ # @return [void]
147
+ def parse_stream(stream, options = {}, &block)
148
+ self.statement_handler = block if block_given?
149
+
150
+ begin
151
+ parse_start!((options[:base_uri] || BASE_URI).to_s)
152
+ loop do
153
+ parse_chunk(stream.readpartial(BUFFER_SIZE))
154
+ end
155
+ rescue EOFError => e
156
+ parse_end!
157
+ end
158
+ end
159
+
160
+ ##
161
+ # @param [String, #to_str] buffer
162
+ # the input buffer to parse
163
+ # @param [Hash{Symbol => Object}] options
164
+ # any additional options for parsing (see {#parse})
165
+ # @yield [parser, statement]
166
+ # each statement in the input
167
+ # @yieldparam [FFI::Pointer] parser
168
+ # @yieldparam [FFI::Pointer] statement
169
+ # @yieldreturn [void] ignored
170
+ # @return [void]
171
+ def parse_buffer(buffer, options = {}, &block)
172
+ self.statement_handler = block if block_given?
173
+
174
+ parse_start!((options[:base_uri] || BASE_URI).to_s)
175
+ parse_chunk(buffer.to_str)
176
+ parse_end!
177
+ end
178
+
179
+ ##
180
+ # @private
181
+ # @param [String] base_uri
182
+ # @return [void]
183
+ def parse_start!(base_uri = BASE_URI)
184
+ result = V1.raptor_start_parse(self, base_uri)
185
+ # TODO: error handling if result.nonzero?
186
+ end
187
+
188
+ ##
189
+ # @private
190
+ # @param [String] buffer
191
+ # the input chunk to parse
192
+ # @return [void]
193
+ def parse_chunk(buffer)
194
+ result = V1.raptor_parse_chunk(self, buffer, buffer.bytesize, 0)
195
+ # TODO: error handling if result.nonzero?
196
+ end
197
+
198
+ ##
199
+ # @private
200
+ # @return [void]
201
+ def parse_end!
202
+ result = V1.raptor_parse_chunk(self, nil, 0, 1) # EOF
203
+ end
204
+ end # Parser
205
+ end # RDF::Raptor::FFI::V1