rdf-raptor 0.4.0 → 0.4.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,130 @@
1
+ module RDF::Raptor::FFI
2
+ ##
3
+ # A foreign-function interface (FFI) to `libraptor` 1.4.x.
4
+ #
5
+ # @see http://librdf.org/raptor/libraptor.html
6
+ module V1
7
+ autoload :IOStream, 'rdf/raptor/ffi/v1/iostream'
8
+ autoload :IOStreamHandler, 'rdf/raptor/ffi/v1/iostream_handler'
9
+ autoload :Parser, 'rdf/raptor/ffi/v1/parser'
10
+ autoload :Serializer, 'rdf/raptor/ffi/v1/serializer'
11
+ autoload :Statement, 'rdf/raptor/ffi/v1/statement'
12
+ autoload :URI, 'rdf/raptor/ffi/v1/uri'
13
+
14
+ extend ::FFI::Library
15
+ ffi_lib RDF::Raptor::LIBRAPTOR
16
+
17
+ # TODO: Ideally this would be an enum, but the JRuby FFI (as of
18
+ # version 1.4.0) has problems with enums as part of structs:
19
+ # `Unknown field type: #<FFI::Enum> (ArgumentError)`
20
+ RAPTOR_IDENTIFIER_TYPE_UNKNOWN = 0
21
+ RAPTOR_IDENTIFIER_TYPE_RESOURCE = 1
22
+ RAPTOR_IDENTIFIER_TYPE_ANONYMOUS = 2
23
+ RAPTOR_IDENTIFIER_TYPE_LITERAL = 5
24
+
25
+ # @see http://librdf.org/raptor/api-1.4/tutorial-initialising-finishing.html
26
+ attach_function :raptor_init, [], :void
27
+ attach_function :raptor_finish, [], :void
28
+ attach_function :raptor_alloc_memory, [:size_t], :pointer
29
+ attach_function :raptor_calloc_memory, [:size_t, :size_t], :pointer
30
+ attach_function :raptor_free_memory, [:pointer], :void
31
+
32
+ # @see http://librdf.org/raptor/api-1.4/raptor-section-locator.html
33
+ typedef :pointer, :raptor_locator
34
+ attach_function :raptor_locator_line, [:raptor_locator], :int
35
+ attach_function :raptor_locator_column, [:raptor_locator], :int
36
+ attach_function :raptor_locator_byte, [:raptor_locator], :int
37
+
38
+ # @see http://librdf.org/raptor/api-1.4/raptor-section-general.html
39
+ attach_variable :raptor_version_major, :int
40
+ attach_variable :raptor_version_minor, :int
41
+ attach_variable :raptor_version_release, :int
42
+ attach_variable :raptor_version_decimal, :int
43
+ callback :raptor_message_handler, [:pointer, :raptor_locator, :string], :void
44
+
45
+ # @see http://librdf.org/raptor/api-1.4/raptor-section-uri.html
46
+ typedef :pointer, :raptor_uri
47
+ attach_function :raptor_new_uri, [:string], :raptor_uri
48
+ attach_function :raptor_uri_copy, [:raptor_uri], :raptor_uri
49
+ attach_function :raptor_uri_equals, [:raptor_uri, :raptor_uri], :int
50
+ attach_function :raptor_uri_as_string, [:raptor_uri], :string
51
+ attach_function :raptor_uri_to_string, [:raptor_uri], :string
52
+ attach_function :raptor_uri_print, [:raptor_uri, :pointer], :void
53
+ attach_function :raptor_free_uri, [:raptor_uri], :void
54
+
55
+ # @see http://librdf.org/raptor/api-1.4/raptor-section-triples.html
56
+ typedef :int, :raptor_identifier_type
57
+ typedef :pointer, :raptor_identifier
58
+ typedef :pointer, :raptor_statement
59
+ attach_function :raptor_statement_compare, [:raptor_statement, :raptor_statement], :int
60
+ attach_function :raptor_print_statement, [:raptor_statement, :pointer], :void
61
+ attach_function :raptor_print_statement_as_ntriples, [:pointer, :pointer], :void
62
+ attach_function :raptor_statement_part_as_string, [:pointer, :raptor_identifier_type, :raptor_uri, :pointer], :string
63
+
64
+ # @see http://librdf.org/raptor/api-1.4/raptor-section-parser.html
65
+ callback :raptor_statement_handler, [:pointer, :raptor_statement], :void
66
+ typedef :pointer, :raptor_parser
67
+ attach_function :raptor_new_parser, [:string], :raptor_parser
68
+ attach_function :raptor_set_error_handler, [:raptor_parser, :pointer, :raptor_message_handler], :void
69
+ attach_function :raptor_set_warning_handler, [:raptor_parser, :pointer, :raptor_message_handler], :void
70
+ attach_function :raptor_set_statement_handler, [:raptor_parser, :pointer, :raptor_statement_handler], :void
71
+ attach_function :raptor_parse_file, [:raptor_parser, :raptor_uri, :raptor_uri], :int
72
+ attach_function :raptor_parse_file_stream, [:raptor_parser, :pointer, :string, :raptor_uri], :int
73
+ attach_function :raptor_parse_uri, [:raptor_parser, :raptor_uri, :raptor_uri], :int
74
+ attach_function :raptor_start_parse, [:raptor_parser, :string], :int
75
+ attach_function :raptor_parse_chunk, [:raptor_parser, :string, :size_t, :int], :int
76
+ attach_function :raptor_get_mime_type, [:raptor_parser], :string
77
+ attach_function :raptor_set_parser_strict, [:raptor_parser, :int], :void
78
+ attach_function :raptor_get_need_base_uri, [:raptor_parser], :int
79
+ attach_function :raptor_free_parser, [:raptor_parser], :void
80
+
81
+ # @see http://librdf.org/raptor/api-1.4/raptor-section-iostream.html
82
+ typedef :pointer, :raptor_iostream
83
+ attach_function :raptor_new_iostream_from_handler2, [:pointer, :pointer], :raptor_iostream
84
+ attach_function :raptor_free_iostream, [:raptor_iostream], :void
85
+ callback :raptor_iostream_init_func, [:pointer], :int
86
+ callback :raptor_iostream_finish_func, [:pointer], :void
87
+ callback :raptor_iostream_write_byte_func, [:pointer, :int], :int
88
+ callback :raptor_iostream_write_bytes_func, [:pointer, :pointer, :size_t, :size_t], :int
89
+ callback :raptor_iostream_write_end_func, [:pointer], :void
90
+ callback :raptor_iostream_read_bytes_func, [:pointer, :pointer, :size_t, :size_t], :int
91
+ callback :raptor_iostream_read_eof_func, [:pointer], :int
92
+
93
+ # @see http://librdf.org/raptor/api-1.4/raptor-section-xml-namespace.html
94
+ typedef :pointer, :raptor_namespace
95
+
96
+ # @see http://librdf.org/raptor/api-1.4/raptor-section-serializer.html
97
+ typedef :pointer, :raptor_serializer
98
+ attach_function :raptor_new_serializer, [:string], :raptor_serializer
99
+ attach_function :raptor_free_serializer, [:raptor_serializer], :void
100
+ attach_function :raptor_serialize_start_to_iostream, [:raptor_serializer, :raptor_uri, :raptor_iostream], :int
101
+ attach_function :raptor_serialize_start_to_filename, [:raptor_serializer, :string], :int
102
+ attach_function :raptor_serialize_statement, [:raptor_serializer, :raptor_statement], :int
103
+ attach_function :raptor_serialize_end, [:raptor_serializer], :int
104
+ attach_function :raptor_serializer_set_error_handler, [:raptor_serializer, :pointer, :raptor_message_handler], :void
105
+ attach_function :raptor_serializer_set_warning_handler, [:raptor_serializer, :pointer, :raptor_message_handler], :void
106
+
107
+ # Initialize the world.
108
+ # We do this exactly once and never release because we can't delegate
109
+ # any memory management to the Ruby GC.
110
+ # Internally `raptor_init`/`raptor_finish` work with reference counts.
111
+ raptor_init
112
+
113
+ ##
114
+ # Allocates memory for the string `str` inside `libraptor`, copying the
115
+ # string into the newly-allocated buffer.
116
+ #
117
+ # The buffer should later be deallocated using `raptor_free_string`.
118
+ #
119
+ # @return [FFI::Pointer]
120
+ def raptor_new_string(str)
121
+ ptr = V1.raptor_alloc_memory(str.bytesize + 1)
122
+ ptr.put_string(0, str)
123
+ ptr
124
+ end
125
+ module_function :raptor_new_string
126
+
127
+ alias_method :raptor_free_string, :raptor_free_memory
128
+ module_function :raptor_free_string
129
+ end # V1
130
+ end # RDF::Raptor::FFI
@@ -0,0 +1,47 @@
1
+ module RDF::Raptor::FFI::V1
2
+ ##
3
+ # This class provides an I/O stream that can write to filenames, `FILE*`,
4
+ # strings and user-defined output via callbacks.
5
+ #
6
+ # @see http://librdf.org/raptor/api-1.4/raptor-section-iostream.html
7
+ class IOStream < ::FFI::ManagedStruct
8
+ include RDF::Raptor::FFI
9
+ layout :user_data, :pointer # the actual layout is private
10
+
11
+ ##
12
+ # @overload initialize(ptr)
13
+ # @param [FFI::Pointer] ptr
14
+ #
15
+ # @overload initialize(handler)
16
+ # @param [V1::IOStreamHandler] handler
17
+ #
18
+ # @overload initialize(file)
19
+ # @param [File, Tempfile] file
20
+ #
21
+ def initialize(ptr_or_obj, options = {})
22
+ ptr = case ptr_or_obj
23
+ when FFI::Pointer
24
+ ptr_or_obj
25
+ when V1::IOStreamHandler
26
+ @handler = ptr_or_obj # prevents premature GC
27
+ V1.raptor_new_iostream_from_handler2(self, @handler)
28
+ when File, Tempfile
29
+ V1.raptor_new_iostream_to_filename(File.expand_path(ptr_or_obj.path))
30
+ when false
31
+ V1.raptor_new_iostream_to_sink()
32
+ else nil
33
+ end
34
+ raise ArgumentError, "invalid argument: #{ptr_or_obj.inspect}" if ptr.nil? || ptr.null?
35
+ super(ptr)
36
+ end
37
+
38
+ ##
39
+ # Releases `libraptor` memory associated with this structure.
40
+ #
41
+ # @param [FFI::Pointer] ptr
42
+ # @return [void]
43
+ def self.release(ptr)
44
+ V1.raptor_free_iostream(ptr)
45
+ end
46
+ end # IOStream
47
+ end # RDF::Raptor::FFI::V1
@@ -0,0 +1,151 @@
1
+ module RDF::Raptor::FFI::V1
2
+ ##
3
+ # @see http://librdf.org/raptor/api-1.4/raptor-section-iostream.html
4
+ class IOStreamHandler < ::FFI::Struct
5
+ include RDF::Raptor::FFI
6
+ layout :version, :int,
7
+ :init, :raptor_iostream_init_func,
8
+ :finish, :raptor_iostream_finish_func,
9
+ :write_byte, :raptor_iostream_write_byte_func,
10
+ :write_bytes, :raptor_iostream_write_bytes_func,
11
+ :write_end, :raptor_iostream_write_end_func,
12
+ :read_bytes, :raptor_iostream_read_bytes_func,
13
+ :read_eof, :raptor_iostream_read_eof_func
14
+
15
+ HANDLERS = [:init, :finish, :write_byte, :write_bytes, :read_bytes, :read_eof]
16
+
17
+ ##
18
+ # The IO object to operate upon.
19
+ #
20
+ # @return [IO]
21
+ attr_accessor :io
22
+
23
+ ##
24
+ # @overload initialize(ptr)
25
+ # @param [FFI::Pointer] ptr
26
+ #
27
+ # @overload initialize(io)
28
+ # @param [IO, StringIO] io
29
+ #
30
+ def initialize(ptr_or_io = nil)
31
+ ptr = case ptr_or_io
32
+ when FFI::Pointer
33
+ ptr_or_io
34
+ when IO, StringIO
35
+ @io = ptr_or_io
36
+ nil
37
+ when nil then nil
38
+ else
39
+ raise ArgumentError, "invalid argument: #{ptr_or_io.inspect}"
40
+ end
41
+ super(ptr)
42
+ initialize!
43
+ end
44
+
45
+ ##
46
+ # @return [void]
47
+ def initialize!
48
+ self[:version] = 2
49
+
50
+ #define_handler(:init) do |context|
51
+ # $stderr.puts("#{self.class}: init")
52
+ #end
53
+ #define_handler(:finish) do |context|
54
+ # $stderr.puts("#{self.class}: finish")
55
+ #end
56
+ define_handler(:write_byte) do |context, byte|
57
+ begin
58
+ @io.putc(byte)
59
+ 0
60
+ rescue => e
61
+ $stderr.puts("#{e} in #{self.class}#write_byte")
62
+ 1
63
+ end
64
+ end
65
+ define_handler(:write_bytes) do |context, data, size, nmemb|
66
+ begin
67
+ @io.write(data.read_string(size * nmemb))
68
+ 0
69
+ rescue => e
70
+ $stderr.puts("#{e} in #{self.class}#write_bytes")
71
+ 1
72
+ end
73
+ end
74
+ #define_handler(:write_end) do |context|
75
+ # $stderr.puts("#{self.class}: write_end")
76
+ #end
77
+ #define_handler(:read_bytes) do |context, data, size, nmemb|
78
+ # $stderr.puts("#{self.class}: read_bytes")
79
+ #end
80
+ #define_handler(:read_eof) do |context|
81
+ # $stderr.puts("#{self.class}: read_eof")
82
+ #end
83
+ end
84
+
85
+ ##
86
+ # @param [Proc] func
87
+ # @return [void]
88
+ def init_handler=(func)
89
+ define_handler(:init, &func)
90
+ end
91
+ alias_method :init=, :init_handler=
92
+
93
+ ##
94
+ # @param [Proc] func
95
+ # @return [void]
96
+ def finish_handler=(func)
97
+ define_handler(:finish, &func)
98
+ end
99
+ alias_method :finish=, :finish_handler=
100
+
101
+ ##
102
+ # @param [Proc] func
103
+ # @return [void]
104
+ def write_byte_handler=(func)
105
+ define_handler(:write_byte, &func)
106
+ end
107
+ alias_method :write_byte=, :write_byte_handler=
108
+
109
+ ##
110
+ # @param [Proc] func
111
+ # @return [void]
112
+ def write_bytes_handler=(func)
113
+ define_handler(:write_bytes, &func)
114
+ end
115
+ alias_method :write_bytes=, :write_bytes_handler=
116
+
117
+ ##
118
+ # @param [Proc] func
119
+ # @return [void]
120
+ def write_end_handler=(func)
121
+ define_handler(:write_end, &func)
122
+ end
123
+ alias_method :write_end=, :write_end_handler=
124
+
125
+ ##
126
+ # @param [Proc] func
127
+ # @return [void]
128
+ def read_bytes_handler=(func)
129
+ define_handler(:read_bytes, &func)
130
+ end
131
+ alias_method :read_bytes=, :read_bytes_handler=
132
+
133
+ ##
134
+ # @param [Proc] func
135
+ # @return [void]
136
+ def read_eof_handler=(func)
137
+ define_handler(:read_eof, &func)
138
+ end
139
+ alias_method :read_eof=, :read_eof_handler=
140
+
141
+ ##
142
+ # @param [Symbol, #to_sym] name
143
+ # @return [void]
144
+ def define_handler(name, &block)
145
+ name = name.to_sym
146
+ raise ArgumentError, "invalid IOStreamHandler function name: #{name}" unless HANDLERS.include?(name)
147
+ @procs ||= {} # prevents premature GC of the procs
148
+ @procs[name] = self[name] = block
149
+ end
150
+ end # IOStreamHandler
151
+ end # RDF::Raptor::FFI::V1
@@ -0,0 +1,205 @@
1
+ module RDF::Raptor::FFI::V1
2
+ ##
3
+ # This class provides the functionality of turning syntaxes into RDF
4
+ # triples - RDF parsing.
5
+ #
6
+ # @see http://librdf.org/raptor/api-1.4/raptor-section-parser.html
7
+ class Parser < ::FFI::ManagedStruct
8
+ include RDF::Raptor::FFI
9
+ layout :world, :pointer # the actual layout is private
10
+
11
+ # The default base URI
12
+ BASE_URI = 'file:///dev/stdin'
13
+
14
+ # The maximum chunk size for `#parse_stream`
15
+ BUFFER_SIZE = 64 * 1024
16
+
17
+ ##
18
+ # @overload initialize(ptr)
19
+ # @param [FFI::Pointer] ptr
20
+ #
21
+ # @overload initialize(name)
22
+ # @param [Symbol, String] name
23
+ #
24
+ def initialize(ptr_or_name)
25
+ ptr = case ptr_or_name
26
+ when FFI::Pointer then ptr_or_name
27
+ when Symbol then V1.raptor_new_parser(ptr_or_name.to_s)
28
+ when String then V1.raptor_new_parser(ptr_or_name)
29
+ else nil
30
+ end
31
+ raise ArgumentError, "invalid argument: #{ptr_or_name.inspect}" if ptr.nil? || ptr.null?
32
+ super(ptr)
33
+ end
34
+
35
+ ##
36
+ # Releases `libraptor` memory associated with this structure.
37
+ #
38
+ # @param [FFI::Pointer] ptr
39
+ # @return [void]
40
+ def self.release(ptr)
41
+ V1.raptor_free_parser(ptr)
42
+ end
43
+
44
+ ##
45
+ # @param [Proc] handler
46
+ # @return [void]
47
+ def error_handler=(handler)
48
+ V1.raptor_set_error_handler(self, self, handler)
49
+ end
50
+
51
+ ##
52
+ # @param [Proc] handler
53
+ # @return [void]
54
+ def warning_handler=(handler)
55
+ V1.raptor_set_warning_handler(self, self, handler)
56
+ end
57
+
58
+ ##
59
+ # @param [Proc] handler
60
+ # @return [void]
61
+ def statement_handler=(handler)
62
+ V1.raptor_set_statement_handler(self, self, handler)
63
+ end
64
+
65
+ ##
66
+ # @param [Object] input
67
+ # the input to parse
68
+ # @param [Hash{Symbol => Object}] options
69
+ # any additional options for parsing
70
+ # @option options [String, #to_s] :base_uri (nil)
71
+ # the base URI to use when resolving relative URIs
72
+ # @yield [parser, statement]
73
+ # each statement in the input
74
+ # @yieldparam [FFI::Pointer] parser
75
+ # @yieldparam [FFI::Pointer] statement
76
+ # @yieldreturn [void] ignored
77
+ # @return [void]
78
+ def parse(input, options = {}, &block)
79
+ case input
80
+ when RDF::URI, %r(^(file|https|http|ftp)://)
81
+ parse_url(input, options, &block)
82
+ when File, Tempfile
83
+ parse_file(input, options, &block)
84
+ when IO, StringIO
85
+ parse_stream(input, options, &block)
86
+ when String
87
+ parse_buffer(input, options, &block)
88
+ else
89
+ raise ArgumentError, "don't know how to parse #{input.inspect}"
90
+ end
91
+ end
92
+
93
+ ##
94
+ # @param [RDF::URI, String, #to_s] url
95
+ # the input URL to parse
96
+ # @param [Hash{Symbol => Object}] options
97
+ # any additional options for parsing (see {#parse})
98
+ # @yield [parser, statement]
99
+ # each statement in the input
100
+ # @yieldparam [FFI::Pointer] parser
101
+ # @yieldparam [FFI::Pointer] statement
102
+ # @yieldreturn [void] ignored
103
+ # @return [void]
104
+ def parse_url(url, options = {}, &block)
105
+ self.statement_handler = block if block_given?
106
+
107
+ data_url = V1::URI.new((url.respond_to?(:to_uri) ? url.to_uri : url).to_s)
108
+ base_uri = options[:base_uri].to_s.empty? ? nil : V1::URI.new(options[:base_uri].to_s)
109
+
110
+ result = V1.raptor_parse_uri(self, data_url, base_uri)
111
+ # TODO: error handling if result.nonzero?
112
+ end
113
+ alias_method :parse_uri, :parse_url
114
+
115
+ ##
116
+ # @param [File, Tempfile, #path] file
117
+ # the input file to parse
118
+ # @param [Hash{Symbol => Object}] options
119
+ # any additional options for parsing (see {#parse})
120
+ # @yield [parser, statement]
121
+ # each statement in the input
122
+ # @yieldparam [FFI::Pointer] parser
123
+ # @yieldparam [FFI::Pointer] statement
124
+ # @yieldreturn [void] ignored
125
+ # @return [void]
126
+ def parse_file(file, options = {}, &block)
127
+ self.statement_handler = block if block_given?
128
+
129
+ data_url = V1::URI.new("file://#{File.expand_path(file.path)}")
130
+ base_uri = options[:base_uri].to_s.empty? ? nil : V1::URI.new(options[:base_uri].to_s)
131
+
132
+ result = V1.raptor_parse_file(self, data_url, base_uri)
133
+ # TODO: error handling if result.nonzero?
134
+ end
135
+
136
+ ##
137
+ # @param [IO, StringIO, #readpartial] stream
138
+ # the input stream to parse
139
+ # @param [Hash{Symbol => Object}] options
140
+ # any additional options for parsing (see {#parse})
141
+ # @yield [parser, statement]
142
+ # each statement in the input
143
+ # @yieldparam [FFI::Pointer] parser
144
+ # @yieldparam [FFI::Pointer] statement
145
+ # @yieldreturn [void] ignored
146
+ # @return [void]
147
+ def parse_stream(stream, options = {}, &block)
148
+ self.statement_handler = block if block_given?
149
+
150
+ begin
151
+ parse_start!((options[:base_uri] || BASE_URI).to_s)
152
+ loop do
153
+ parse_chunk(stream.readpartial(BUFFER_SIZE))
154
+ end
155
+ rescue EOFError => e
156
+ parse_end!
157
+ end
158
+ end
159
+
160
+ ##
161
+ # @param [String, #to_str] buffer
162
+ # the input buffer to parse
163
+ # @param [Hash{Symbol => Object}] options
164
+ # any additional options for parsing (see {#parse})
165
+ # @yield [parser, statement]
166
+ # each statement in the input
167
+ # @yieldparam [FFI::Pointer] parser
168
+ # @yieldparam [FFI::Pointer] statement
169
+ # @yieldreturn [void] ignored
170
+ # @return [void]
171
+ def parse_buffer(buffer, options = {}, &block)
172
+ self.statement_handler = block if block_given?
173
+
174
+ parse_start!((options[:base_uri] || BASE_URI).to_s)
175
+ parse_chunk(buffer.to_str)
176
+ parse_end!
177
+ end
178
+
179
+ ##
180
+ # @private
181
+ # @param [String] base_uri
182
+ # @return [void]
183
+ def parse_start!(base_uri = BASE_URI)
184
+ result = V1.raptor_start_parse(self, base_uri)
185
+ # TODO: error handling if result.nonzero?
186
+ end
187
+
188
+ ##
189
+ # @private
190
+ # @param [String] buffer
191
+ # the input chunk to parse
192
+ # @return [void]
193
+ def parse_chunk(buffer)
194
+ result = V1.raptor_parse_chunk(self, buffer, buffer.bytesize, 0)
195
+ # TODO: error handling if result.nonzero?
196
+ end
197
+
198
+ ##
199
+ # @private
200
+ # @return [void]
201
+ def parse_end!
202
+ result = V1.raptor_parse_chunk(self, nil, 0, 1) # EOF
203
+ end
204
+ end # Parser
205
+ end # RDF::Raptor::FFI::V1