ffi-extractor 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,44 @@
1
+ #
2
+ # ffi-extractor - Ruby FFI bindings for libextractor
3
+ #
4
+ # Copyright (c) 2012 - Hal Brodigan (postmodern.mod3 at gmail.com)
5
+ #
6
+ # This program is free software; you can redistribute it and/or modify
7
+ # it under the terms of the GNU General Public License as published by
8
+ # the Free Software Foundation; either version 2 of the License, or
9
+ # (at your option) any later version.
10
+ #
11
+ # This program is distributed in the hope that it will be useful,
12
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
13
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14
+ # GNU General Public License for more details.
15
+ #
16
+ # You should have received a copy of the GNU General Public License
17
+ # along with this program; if not, write to the Free Software
18
+ # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
19
+ #
20
+
21
+ require 'ffi/extractor/types'
22
+
23
+ require 'ffi'
24
+
25
+ module FFI
26
+ module Extractor
27
+ extend FFI::Library
28
+
29
+ ffi_lib ['extractor', 'libextractor.so.3']
30
+
31
+ attach_function :EXTRACTOR_metatype_to_string, [:extractor_meta_type], :string
32
+ attach_function :EXTRACTOR_metatype_to_description, [:extractor_meta_type], :string
33
+ attach_function :EXTRACTOR_metatype_get_max, [], :extractor_meta_type
34
+
35
+ attach_function :EXTRACTOR_plugin_add_defaults, [:extractor_policy], :extractor_plugin_list
36
+ attach_function :EXTRACTOR_plugin_add, [:extractor_plugin_list, :string, :string, :extractor_policy], :extractor_plugin_list
37
+ attach_function :EXTRACTOR_plugin_add_config, [:extractor_plugin_list, :string, :extractor_policy], :extractor_plugin_list
38
+ attach_function :EXTRACTOR_plugin_remove, [:extractor_plugin_list, :string], :extractor_plugin_list
39
+ attach_function :EXTRACTOR_plugin_remove_all, [:extractor_plugin_list], :void
40
+
41
+ attach_function :EXTRACTOR_extract, [:extractor_plugin_list, :string, :pointer, :size_t, :extractor_meta_data_processor, :pointer], :void
42
+ attach_function :EXTRACTOR_meta_data_print, [:pointer, :string, :extractor_meta_type, :extractor_meta_format, :string, :string, :size_t], :int
43
+ end
44
+ end
@@ -0,0 +1,77 @@
1
+ #
2
+ # ffi-extractor - Ruby FFI bindings for libextractor
3
+ #
4
+ # Copyright (c) 2012 - Hal Brodigan (postmodern.mod3 at gmail.com)
5
+ #
6
+ # This program is free software; you can redistribute it and/or modify
7
+ # it under the terms of the GNU General Public License as published by
8
+ # the Free Software Foundation; either version 2 of the License, or
9
+ # (at your option) any later version.
10
+ #
11
+ # This program is distributed in the hope that it will be useful,
12
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
13
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14
+ # GNU General Public License for more details.
15
+ #
16
+ # You should have received a copy of the GNU General Public License
17
+ # along with this program; if not, write to the Free Software
18
+ # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
19
+ #
20
+
21
+ require 'ffi'
22
+
23
+ module FFI
24
+ module Extractor
25
+ class MetadataProcessor < Proc
26
+
27
+ # Mapping of plugin paths to names
28
+ PLUGIN_NAMES = Hash.new do |plugin_names,plugin|
29
+ libname = File.basename(plugin).chomp(File.extname(plugin))
30
+
31
+ plugin_names[plugin] = libname.sub('libextractor_','').to_sym
32
+ end
33
+
34
+ #
35
+ # Wraps a Metadata Processor callback.
36
+ #
37
+ # @yield [plugin_name, type, format, mime_type, data]
38
+ # The given block will be passed the extracted metadata.
39
+ #
40
+ # @yieldparam [Symbol] plugin_name
41
+ # The name of the plugin.
42
+ #
43
+ # @yieldparam [Symbol] type
44
+ # The type of metadata.
45
+ #
46
+ # @yieldparam [:unknown, :utf8, :binary, :c_string] format
47
+ # The format of the metadata.
48
+ #
49
+ # @yieldparam [String] mime_type
50
+ # The MIME-type of the data.
51
+ #
52
+ # @yieldparam [String, FFI::Pointer] data
53
+ # The extracted metadata. If the `type` is `:unknown`, the original
54
+ # `FFI::Pointer` object will be yielded.
55
+ #
56
+ # @return [Proc]
57
+ # The wrapped callback.
58
+ #
59
+ def self.new(&block)
60
+ super do |cls,plugin,type,format,mime_type,data,size|
61
+ catch(:return) {
62
+ value = case format
63
+ when :c_string, :utf8 then data.get_string(0,size)
64
+ when :binary then data.get_bytes(0,size)
65
+ else data
66
+ end
67
+
68
+ yield PLUGIN_NAMES[plugin], type, format, mime_type, value
69
+
70
+ 0
71
+ }
72
+ end
73
+ end
74
+
75
+ end
76
+ end
77
+ end
@@ -0,0 +1,153 @@
1
+ #
2
+ # ffi-extractor - Ruby FFI bindings for libextractor
3
+ #
4
+ # Copyright (c) 2012 - Hal Brodigan (postmodern.mod3 at gmail.com)
5
+ #
6
+ # This program is free software; you can redistribute it and/or modify
7
+ # it under the terms of the GNU General Public License as published by
8
+ # the Free Software Foundation; either version 2 of the License, or
9
+ # (at your option) any later version.
10
+ #
11
+ # This program is distributed in the hope that it will be useful,
12
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
13
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14
+ # GNU General Public License for more details.
15
+ #
16
+ # You should have received a copy of the GNU General Public License
17
+ # along with this program; if not, write to the Free Software
18
+ # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
19
+ #
20
+
21
+ require 'ffi/extractor/library'
22
+
23
+ module FFI
24
+ module Extractor
25
+ #
26
+ # Represents the list of loaded extractor plugins.
27
+ #
28
+ class PluginList
29
+
30
+ #
31
+ # Initializes the plugin list.
32
+ #
33
+ # @param [FFI::Pointer] ptr
34
+ # The pointer to the list.
35
+ #
36
+ def initialize(ptr=nil)
37
+ @ptr = ptr
38
+ end
39
+
40
+ #
41
+ # Releases the plugin list.
42
+ #
43
+ # @param [FFI::Pointer] ptr
44
+ # The pointer to the list.
45
+ #
46
+ def self.release(ptr)
47
+ Extractor.EXTRACTOR_plugin_remove_all(ptr)
48
+ end
49
+
50
+ #
51
+ # Loads the installed extractor plugins.
52
+ #
53
+ # @param [Symbol] policy
54
+ # The policy for how the plugins will be ran.
55
+ #
56
+ # @return [PluginList]
57
+ # The loaded plugins.
58
+ #
59
+ # @raise [LoadError]
60
+ # The no plugins were loaded.
61
+ #
62
+ def self.default(policy=:default)
63
+ ptr = Extractor.EXTRACTOR_plugin_add_defaults(policy)
64
+
65
+ if ptr.null?
66
+ raise(LoadError,"no plugins were loaded")
67
+ end
68
+
69
+ return new(ptr)
70
+ end
71
+
72
+ #
73
+ # Loads a plugin and adds it to the list.
74
+ #
75
+ # @param [Symbol] name
76
+ # The plugin name.
77
+ #
78
+ # @param [String] options
79
+ # Options for the plugin.
80
+ #
81
+ # @param [Symbol] policy
82
+ # The policy for how the plugin will be ran.
83
+ #
84
+ # @return [PluginList]
85
+ # The modified plugin list.
86
+ #
87
+ # @raise [LoadError]
88
+ # The plugin could not be loaded.
89
+ #
90
+ def add(name,options='',policy=:default)
91
+ name = name.to_s
92
+ new_ptr = Extractor.EXTRACTOR_plugin_add(@ptr,name,options,policy)
93
+
94
+ if new_ptr == @ptr
95
+ raise(LoadError,"could not add #{name.dump} to the plugin list")
96
+ end
97
+
98
+ @ptr = new_ptr
99
+ return self
100
+ end
101
+
102
+ #
103
+ # Removes a plugin from the list.
104
+ #
105
+ # @param [Symbol] name
106
+ # The plugin name.
107
+ #
108
+ # @return [PluginList]
109
+ # The modified plugin list.
110
+ #
111
+ # @raise [ArgumentError]
112
+ # The plugin could not be found in the list.
113
+ #
114
+ def remove(name)
115
+ name = name.to_s
116
+ new_ptr = Extractor.EXTRACTOR_plugin_remove(@ptr,name)
117
+
118
+ if new_ptr == @ptr
119
+ raise(ArgumentError,"could not remove #{name.dump} from the plugin list")
120
+ end
121
+
122
+ @ptr = new_ptr
123
+ return self
124
+ end
125
+
126
+ alias delete remove
127
+
128
+ #
129
+ # Removes all plugins from the list.
130
+ #
131
+ # @return [PluginList]
132
+ # The empty plugin list.
133
+ #
134
+ def remove_all
135
+ Extractor.EXTRACTOR_plugin_remove_all(@ptr)
136
+ return self
137
+ end
138
+
139
+ alias clear remove_all
140
+
141
+ #
142
+ # Converts the plugin list to a pointer.
143
+ #
144
+ # @return [FFI::Pointer]
145
+ # The pointer to the plugin list.
146
+ #
147
+ def to_ptr
148
+ @ptr
149
+ end
150
+
151
+ end
152
+ end
153
+ end
@@ -0,0 +1,257 @@
1
+ #
2
+ # ffi-extractor - Ruby FFI bindings for libextractor
3
+ #
4
+ # Copyright (c) 2012 - Hal Brodigan (postmodern.mod3 at gmail.com)
5
+ #
6
+ # This program is free software; you can redistribute it and/or modify
7
+ # it under the terms of the GNU General Public License as published by
8
+ # the Free Software Foundation; either version 2 of the License, or
9
+ # (at your option) any later version.
10
+ #
11
+ # This program is distributed in the hope that it will be useful,
12
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
13
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14
+ # GNU General Public License for more details.
15
+ #
16
+ # You should have received a copy of the GNU General Public License
17
+ # along with this program; if not, write to the Free Software
18
+ # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
19
+ #
20
+
21
+ require 'ffi'
22
+
23
+ module FFI
24
+ module Extractor
25
+ extend FFI::Library
26
+
27
+ enum :extractor_policy, [
28
+ :default, 0,
29
+ :out_of_process_no_restart, 1,
30
+ :in_process, 2,
31
+ :disabled, 3
32
+ ]
33
+
34
+ enum :extractor_meta_format, [
35
+ :unknown, 0,
36
+ :utf8, 1,
37
+ :binary, 2,
38
+ :c_string, 3
39
+ ]
40
+
41
+ enum :extractor_meta_type, [
42
+ # fundamental types
43
+ :reserved, 0,
44
+ :mime_type, 1,
45
+ :file_name, 2,
46
+ :comment, 3,
47
+
48
+ # Standard types from bibtex
49
+ :title, 4,
50
+ :book_title, 5,
51
+ :book_edition, 6,
52
+ :book_chapter_number, 7,
53
+ :journal_name, 8,
54
+ :journal_volume, 9,
55
+ :journal_number, 10,
56
+ :page_count, 11,
57
+ :page_range, 12,
58
+ :author_name, 13,
59
+ :author_email, 14,
60
+ :author_institution, 15,
61
+ :publisher, 16,
62
+ :publisher_address, 17,
63
+ :publisher_institution, 18,
64
+ :publisher_series, 19,
65
+ :publication_type, 20,
66
+ :publication_year, 21,
67
+ :publication_month, 22,
68
+ :publication_day, 23,
69
+ :publication_date, 24,
70
+ :bibtex_eprint, 25,
71
+ :bibtex_entry_type, 26,
72
+ :language, 27,
73
+ :creation_time, 28,
74
+ :url, 29,
75
+
76
+ # "unique" document identifiers
77
+ :uri, 30,
78
+ :isrc, 31,
79
+ :hash_md4, 32,
80
+ :hash_md5, 33,
81
+ :hash_sha0, 34,
82
+ :hash_sha1, 35,
83
+ :hash_rmd160, 36,
84
+
85
+ # identifiers of a location
86
+ :gps_latitude_ref, 37,
87
+ :gps_latitude, 38,
88
+ :gps_longitude_ref, 39,
89
+ :gps_longitude, 40,
90
+ :location_city, 41,
91
+ :location_sublocation, 42,
92
+ :location_country, 43,
93
+ :location_country_code, 44,
94
+
95
+ # generic attributes
96
+ :unknown, 45,
97
+ :description, 46,
98
+ :copyright, 47,
99
+ :rights, 48,
100
+ :keywords, 49,
101
+ :abstract, 50,
102
+ :summary, 51,
103
+ :subject, 52,
104
+ :creator, 53,
105
+ :format, 54,
106
+ :format_version, 55,
107
+
108
+ # processing history
109
+ :created_by_software, 56,
110
+ :unknown_date, 57,
111
+ :creation_date, 58,
112
+ :modification_date, 59,
113
+ :last_printed, 60,
114
+ :last_saved_by, 61,
115
+ :total_editing_time, 62,
116
+ :editing_cycles, 63,
117
+ :modified_by_software, 64,
118
+ :revision_history, 65,
119
+
120
+ :embedded_file_size, 66,
121
+ :finder_file_type, 67,
122
+ :finder_file_creator, 68,
123
+
124
+ # software package specifics (deb, rpm, tgz, elf)
125
+ :package_name, 69,
126
+ :package_version, 70,
127
+ :section, 71,
128
+ :upload_priority, 72,
129
+ :package_dependency, 73,
130
+ :package_conflicts, 74,
131
+ :package_replaces, 75,
132
+ :package_provides, 76,
133
+ :package_recommends, 77,
134
+ :package_suggests, 78,
135
+ :package_maintainer, 79,
136
+ :package_installed_size, 80,
137
+ :package_source, 81,
138
+ :package_essential, 82,
139
+ :target_architecture, 83,
140
+ :package_pre_dependency, 84,
141
+ :license, 85,
142
+ :package_distribution, 86,
143
+ :buildhost, 87,
144
+ :vendor, 88,
145
+ :target_os, 89,
146
+ :software_version, 90,
147
+ :target_platform, 91,
148
+ :resource_type, 92,
149
+ :library_search_path, 93,
150
+ :library_dependency, 94,
151
+
152
+ # photography specifics
153
+ :camera_make, 95,
154
+ :camera_model, 96,
155
+ :exposure, 97,
156
+ :aperture, 98,
157
+ :exposure_bias, 99,
158
+ :flash, 100,
159
+ :flash_bias, 101,
160
+ :focal_length, 102,
161
+ :focal_length_35mm, 103,
162
+ :iso_speed, 104,
163
+ :exposure_mode, 105,
164
+ :metering_mode, 106,
165
+ :macro_mode, 107,
166
+ :image_quality, 108,
167
+ :white_balance, 109,
168
+ :orientation, 110,
169
+ :magnification, 111,
170
+
171
+ # image specifics
172
+ :image_dimensions, 112,
173
+ :produced_by_software, 113,
174
+ :thumbnail, 114,
175
+ :image_resolution, 115,
176
+ :source, 116,
177
+
178
+ # (text) document processing specifics
179
+ :character_set, 117,
180
+ :line_count, 118,
181
+ :paragraph_count, 119,
182
+ :word_count, 120,
183
+ :character_count, 121,
184
+ :page_orientation, 122,
185
+ :paper_size, 123,
186
+ :template, 124,
187
+ :company, 125,
188
+ :manager, 126,
189
+ :revision_number, 127,
190
+
191
+ # music / video specifics
192
+ :duration, 128,
193
+ :album, 129,
194
+ :artist, 130,
195
+ :genre, 131,
196
+ :track_number, 132,
197
+ :disc_number, 133,
198
+ :performer, 134,
199
+ :contact_information, 135,
200
+ :song_version, 136,
201
+ :picture, 137,
202
+ :cover_picture, 138,
203
+ :contributor_picture, 139,
204
+ :event_picture, 140,
205
+ :logo, 141,
206
+ :broadcast_television_system, 142,
207
+ :source_device, 143,
208
+ :disclaimer, 144,
209
+ :warning, 145,
210
+ :page_order, 146,
211
+ :writer, 147,
212
+ :product_version, 148,
213
+ :contributor_name, 149,
214
+ :movie_director, 150,
215
+ :network_name, 151,
216
+ :show_name, 152,
217
+ :chapter_name, 153,
218
+ :song_count, 154,
219
+ :starting_song, 155,
220
+ :play_counter, 156,
221
+ :conductor, 157,
222
+ :interpretation, 158,
223
+ :composer, 159,
224
+ :beats_per_minute, 160,
225
+ :encoded_by, 161,
226
+ :original_title, 162,
227
+ :original_artist, 163,
228
+ :original_writer, 164,
229
+ :original_release_year, 165,
230
+ :original_performer, 166,
231
+ :lyrics, 167,
232
+ :popularity_meter, 168,
233
+ :licensee, 169,
234
+ :musician_credits_list, 170,
235
+ :mood, 171,
236
+ :subtitle, 172,
237
+
238
+ # GNUnet specific values (never extracted)
239
+ :gnunet_display_type, 173,
240
+ :gnunet_full_data, 174,
241
+ :rating, 175,
242
+ :organization, 176,
243
+ :ripper, 177,
244
+ :producer, 178,
245
+ :group, 179,
246
+
247
+ :last, 180
248
+ ]
249
+
250
+ callback :extractor_meta_data_processor, [:pointer, :string, :extractor_meta_type, :extractor_meta_format, :string, :pointer, :size_t], :int
251
+
252
+ callback :extractor_extract_method, [:string, :size_t, :extractor_meta_data_processor, :pointer, :string], :int
253
+
254
+ typedef :pointer, :extractor_plugin_list
255
+
256
+ end
257
+ end
@@ -0,0 +1,26 @@
1
+ #
2
+ # ffi-extractor - Ruby FFI bindings for libextractor
3
+ #
4
+ # Copyright (c) 2012 - Hal Brodigan (postmodern.mod3 at gmail.com)
5
+ #
6
+ # This program is free software; you can redistribute it and/or modify
7
+ # it under the terms of the GNU General Public License as published by
8
+ # the Free Software Foundation; either version 2 of the License, or
9
+ # (at your option) any later version.
10
+ #
11
+ # This program is distributed in the hope that it will be useful,
12
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
13
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14
+ # GNU General Public License for more details.
15
+ #
16
+ # You should have received a copy of the GNU General Public License
17
+ # along with this program; if not, write to the Free Software
18
+ # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
19
+ #
20
+
21
+ module FFI
22
+ module Extractor
23
+ # ffi-extractor version
24
+ VERSION = "0.1.0"
25
+ end
26
+ end
@@ -0,0 +1,23 @@
1
+ #
2
+ # ffi-extractor - Ruby FFI bindings for libextractor
3
+ #
4
+ # Copyright (c) 2012 - Hal Brodigan (postmodern.mod3 at gmail.com)
5
+ #
6
+ # This program is free software; you can redistribute it and/or modify
7
+ # it under the terms of the GNU General Public License as published by
8
+ # the Free Software Foundation; either version 2 of the License, or
9
+ # (at your option) any later version.
10
+ #
11
+ # This program is distributed in the hope that it will be useful,
12
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
13
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14
+ # GNU General Public License for more details.
15
+ #
16
+ # You should have received a copy of the GNU General Public License
17
+ # along with this program; if not, write to the Free Software
18
+ # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
19
+ #
20
+
21
+ require 'ffi/extractor/library'
22
+ require 'ffi/extractor/extractor'
23
+ require 'ffi/extractor/version'
@@ -0,0 +1,45 @@
1
+ require 'spec_helper'
2
+ require 'ffi/extractor'
3
+
4
+ require 'yaml'
5
+
6
+ describe FFI::Extractor do
7
+ it "should have a VERSION constant" do
8
+ subject.const_get('VERSION').should_not be_empty
9
+ end
10
+
11
+ describe "abort!" do
12
+ it "should throw :return" do
13
+ lambda { subject.abort! }.should throw_symbol :return
14
+ end
15
+ end
16
+
17
+ let(:file) { 'spec/files/image.jpg' }
18
+ let(:data) { File.new(file,'rb').read }
19
+
20
+ let(:metadata) { YAML.load_file('spec/files/image.meta') }
21
+
22
+ describe "extract" do
23
+ it "should extract metadata from a String" do
24
+ findings = []
25
+
26
+ subject.extract(data) do |*arguments|
27
+ findings << arguments
28
+ end
29
+
30
+ findings.should =~ metadata
31
+ end
32
+ end
33
+
34
+ describe "extract_from" do
35
+ it "should extract metadata from a file" do
36
+ findings = []
37
+
38
+ subject.extract_from(file) do |*arguments|
39
+ findings << arguments
40
+ end
41
+
42
+ findings.should =~ metadata
43
+ end
44
+ end
45
+ end
Binary file