ffi-extractor 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,44 @@
1
+ #
2
+ # ffi-extractor - Ruby FFI bindings for libextractor
3
+ #
4
+ # Copyright (c) 2012 - Hal Brodigan (postmodern.mod3 at gmail.com)
5
+ #
6
+ # This program is free software; you can redistribute it and/or modify
7
+ # it under the terms of the GNU General Public License as published by
8
+ # the Free Software Foundation; either version 2 of the License, or
9
+ # (at your option) any later version.
10
+ #
11
+ # This program is distributed in the hope that it will be useful,
12
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
13
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14
+ # GNU General Public License for more details.
15
+ #
16
+ # You should have received a copy of the GNU General Public License
17
+ # along with this program; if not, write to the Free Software
18
+ # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
19
+ #
20
+
21
+ require 'ffi/extractor/types'
22
+
23
+ require 'ffi'
24
+
25
+ module FFI
26
+ module Extractor
27
+ extend FFI::Library
28
+
29
+ ffi_lib ['extractor', 'libextractor.so.3']
30
+
31
+ attach_function :EXTRACTOR_metatype_to_string, [:extractor_meta_type], :string
32
+ attach_function :EXTRACTOR_metatype_to_description, [:extractor_meta_type], :string
33
+ attach_function :EXTRACTOR_metatype_get_max, [], :extractor_meta_type
34
+
35
+ attach_function :EXTRACTOR_plugin_add_defaults, [:extractor_policy], :extractor_plugin_list
36
+ attach_function :EXTRACTOR_plugin_add, [:extractor_plugin_list, :string, :string, :extractor_policy], :extractor_plugin_list
37
+ attach_function :EXTRACTOR_plugin_add_config, [:extractor_plugin_list, :string, :extractor_policy], :extractor_plugin_list
38
+ attach_function :EXTRACTOR_plugin_remove, [:extractor_plugin_list, :string], :extractor_plugin_list
39
+ attach_function :EXTRACTOR_plugin_remove_all, [:extractor_plugin_list], :void
40
+
41
+ attach_function :EXTRACTOR_extract, [:extractor_plugin_list, :string, :pointer, :size_t, :extractor_meta_data_processor, :pointer], :void
42
+ attach_function :EXTRACTOR_meta_data_print, [:pointer, :string, :extractor_meta_type, :extractor_meta_format, :string, :string, :size_t], :int
43
+ end
44
+ end
@@ -0,0 +1,77 @@
1
+ #
2
+ # ffi-extractor - Ruby FFI bindings for libextractor
3
+ #
4
+ # Copyright (c) 2012 - Hal Brodigan (postmodern.mod3 at gmail.com)
5
+ #
6
+ # This program is free software; you can redistribute it and/or modify
7
+ # it under the terms of the GNU General Public License as published by
8
+ # the Free Software Foundation; either version 2 of the License, or
9
+ # (at your option) any later version.
10
+ #
11
+ # This program is distributed in the hope that it will be useful,
12
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
13
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14
+ # GNU General Public License for more details.
15
+ #
16
+ # You should have received a copy of the GNU General Public License
17
+ # along with this program; if not, write to the Free Software
18
+ # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
19
+ #
20
+
21
+ require 'ffi'
22
+
23
+ module FFI
24
+ module Extractor
25
+ class MetadataProcessor < Proc
26
+
27
+ # Mapping of plugin paths to names
28
+ PLUGIN_NAMES = Hash.new do |plugin_names,plugin|
29
+ libname = File.basename(plugin).chomp(File.extname(plugin))
30
+
31
+ plugin_names[plugin] = libname.sub('libextractor_','').to_sym
32
+ end
33
+
34
+ #
35
+ # Wraps a Metadata Processor callback.
36
+ #
37
+ # @yield [plugin_name, type, format, mime_type, data]
38
+ # The given block will be passed the extracted metadata.
39
+ #
40
+ # @yieldparam [Symbol] plugin_name
41
+ # The name of the plugin.
42
+ #
43
+ # @yieldparam [Symbol] type
44
+ # The type of metadata.
45
+ #
46
+ # @yieldparam [:unknown, :utf8, :binary, :c_string] format
47
+ # The format of the metadata.
48
+ #
49
+ # @yieldparam [String] mime_type
50
+ # The MIME-type of the data.
51
+ #
52
+ # @yieldparam [String, FFI::Pointer] data
53
+ # The extracted metadata. If the `type` is `:unknown`, the original
54
+ # `FFI::Pointer` object will be yielded.
55
+ #
56
+ # @return [Proc]
57
+ # The wrapped callback.
58
+ #
59
+ def self.new(&block)
60
+ super do |cls,plugin,type,format,mime_type,data,size|
61
+ catch(:return) {
62
+ value = case format
63
+ when :c_string, :utf8 then data.get_string(0,size)
64
+ when :binary then data.get_bytes(0,size)
65
+ else data
66
+ end
67
+
68
+ yield PLUGIN_NAMES[plugin], type, format, mime_type, value
69
+
70
+ 0
71
+ }
72
+ end
73
+ end
74
+
75
+ end
76
+ end
77
+ end
@@ -0,0 +1,153 @@
1
+ #
2
+ # ffi-extractor - Ruby FFI bindings for libextractor
3
+ #
4
+ # Copyright (c) 2012 - Hal Brodigan (postmodern.mod3 at gmail.com)
5
+ #
6
+ # This program is free software; you can redistribute it and/or modify
7
+ # it under the terms of the GNU General Public License as published by
8
+ # the Free Software Foundation; either version 2 of the License, or
9
+ # (at your option) any later version.
10
+ #
11
+ # This program is distributed in the hope that it will be useful,
12
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
13
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14
+ # GNU General Public License for more details.
15
+ #
16
+ # You should have received a copy of the GNU General Public License
17
+ # along with this program; if not, write to the Free Software
18
+ # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
19
+ #
20
+
21
+ require 'ffi/extractor/library'
22
+
23
+ module FFI
24
+ module Extractor
25
+ #
26
+ # Represents the list of loaded extractor plugins.
27
+ #
28
+ class PluginList
29
+
30
+ #
31
+ # Initializes the plugin list.
32
+ #
33
+ # @param [FFI::Pointer] ptr
34
+ # The pointer to the list.
35
+ #
36
+ def initialize(ptr=nil)
37
+ @ptr = ptr
38
+ end
39
+
40
+ #
41
+ # Releases the plugin list.
42
+ #
43
+ # @param [FFI::Pointer] ptr
44
+ # The pointer to the list.
45
+ #
46
+ def self.release(ptr)
47
+ Extractor.EXTRACTOR_plugin_remove_all(ptr)
48
+ end
49
+
50
+ #
51
+ # Loads the installed extractor plugins.
52
+ #
53
+ # @param [Symbol] policy
54
+ # The policy for how the plugins will be ran.
55
+ #
56
+ # @return [PluginList]
57
+ # The loaded plugins.
58
+ #
59
+ # @raise [LoadError]
60
+ # The no plugins were loaded.
61
+ #
62
+ def self.default(policy=:default)
63
+ ptr = Extractor.EXTRACTOR_plugin_add_defaults(policy)
64
+
65
+ if ptr.null?
66
+ raise(LoadError,"no plugins were loaded")
67
+ end
68
+
69
+ return new(ptr)
70
+ end
71
+
72
+ #
73
+ # Loads a plugin and adds it to the list.
74
+ #
75
+ # @param [Symbol] name
76
+ # The plugin name.
77
+ #
78
+ # @param [String] options
79
+ # Options for the plugin.
80
+ #
81
+ # @param [Symbol] policy
82
+ # The policy for how the plugin will be ran.
83
+ #
84
+ # @return [PluginList]
85
+ # The modified plugin list.
86
+ #
87
+ # @raise [LoadError]
88
+ # The plugin could not be loaded.
89
+ #
90
+ def add(name,options='',policy=:default)
91
+ name = name.to_s
92
+ new_ptr = Extractor.EXTRACTOR_plugin_add(@ptr,name,options,policy)
93
+
94
+ if new_ptr == @ptr
95
+ raise(LoadError,"could not add #{name.dump} to the plugin list")
96
+ end
97
+
98
+ @ptr = new_ptr
99
+ return self
100
+ end
101
+
102
+ #
103
+ # Removes a plugin from the list.
104
+ #
105
+ # @param [Symbol] name
106
+ # The plugin name.
107
+ #
108
+ # @return [PluginList]
109
+ # The modified plugin list.
110
+ #
111
+ # @raise [ArgumentError]
112
+ # The plugin could not be found in the list.
113
+ #
114
+ def remove(name)
115
+ name = name.to_s
116
+ new_ptr = Extractor.EXTRACTOR_plugin_remove(@ptr,name)
117
+
118
+ if new_ptr == @ptr
119
+ raise(ArgumentError,"could not remove #{name.dump} from the plugin list")
120
+ end
121
+
122
+ @ptr = new_ptr
123
+ return self
124
+ end
125
+
126
+ alias delete remove
127
+
128
+ #
129
+ # Removes all plugins from the list.
130
+ #
131
+ # @return [PluginList]
132
+ # The empty plugin list.
133
+ #
134
+ def remove_all
135
+ Extractor.EXTRACTOR_plugin_remove_all(@ptr)
136
+ return self
137
+ end
138
+
139
+ alias clear remove_all
140
+
141
+ #
142
+ # Converts the plugin list to a pointer.
143
+ #
144
+ # @return [FFI::Pointer]
145
+ # The pointer to the plugin list.
146
+ #
147
+ def to_ptr
148
+ @ptr
149
+ end
150
+
151
+ end
152
+ end
153
+ end
@@ -0,0 +1,257 @@
1
+ #
2
+ # ffi-extractor - Ruby FFI bindings for libextractor
3
+ #
4
+ # Copyright (c) 2012 - Hal Brodigan (postmodern.mod3 at gmail.com)
5
+ #
6
+ # This program is free software; you can redistribute it and/or modify
7
+ # it under the terms of the GNU General Public License as published by
8
+ # the Free Software Foundation; either version 2 of the License, or
9
+ # (at your option) any later version.
10
+ #
11
+ # This program is distributed in the hope that it will be useful,
12
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
13
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14
+ # GNU General Public License for more details.
15
+ #
16
+ # You should have received a copy of the GNU General Public License
17
+ # along with this program; if not, write to the Free Software
18
+ # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
19
+ #
20
+
21
+ require 'ffi'
22
+
23
+ module FFI
24
+ module Extractor
25
+ extend FFI::Library
26
+
27
+ enum :extractor_policy, [
28
+ :default, 0,
29
+ :out_of_process_no_restart, 1,
30
+ :in_process, 2,
31
+ :disabled, 3
32
+ ]
33
+
34
+ enum :extractor_meta_format, [
35
+ :unknown, 0,
36
+ :utf8, 1,
37
+ :binary, 2,
38
+ :c_string, 3
39
+ ]
40
+
41
+ enum :extractor_meta_type, [
42
+ # fundamental types
43
+ :reserved, 0,
44
+ :mime_type, 1,
45
+ :file_name, 2,
46
+ :comment, 3,
47
+
48
+ # Standard types from bibtex
49
+ :title, 4,
50
+ :book_title, 5,
51
+ :book_edition, 6,
52
+ :book_chapter_number, 7,
53
+ :journal_name, 8,
54
+ :journal_volume, 9,
55
+ :journal_number, 10,
56
+ :page_count, 11,
57
+ :page_range, 12,
58
+ :author_name, 13,
59
+ :author_email, 14,
60
+ :author_institution, 15,
61
+ :publisher, 16,
62
+ :publisher_address, 17,
63
+ :publisher_institution, 18,
64
+ :publisher_series, 19,
65
+ :publication_type, 20,
66
+ :publication_year, 21,
67
+ :publication_month, 22,
68
+ :publication_day, 23,
69
+ :publication_date, 24,
70
+ :bibtex_eprint, 25,
71
+ :bibtex_entry_type, 26,
72
+ :language, 27,
73
+ :creation_time, 28,
74
+ :url, 29,
75
+
76
+ # "unique" document identifiers
77
+ :uri, 30,
78
+ :isrc, 31,
79
+ :hash_md4, 32,
80
+ :hash_md5, 33,
81
+ :hash_sha0, 34,
82
+ :hash_sha1, 35,
83
+ :hash_rmd160, 36,
84
+
85
+ # identifiers of a location
86
+ :gps_latitude_ref, 37,
87
+ :gps_latitude, 38,
88
+ :gps_longitude_ref, 39,
89
+ :gps_longitude, 40,
90
+ :location_city, 41,
91
+ :location_sublocation, 42,
92
+ :location_country, 43,
93
+ :location_country_code, 44,
94
+
95
+ # generic attributes
96
+ :unknown, 45,
97
+ :description, 46,
98
+ :copyright, 47,
99
+ :rights, 48,
100
+ :keywords, 49,
101
+ :abstract, 50,
102
+ :summary, 51,
103
+ :subject, 52,
104
+ :creator, 53,
105
+ :format, 54,
106
+ :format_version, 55,
107
+
108
+ # processing history
109
+ :created_by_software, 56,
110
+ :unknown_date, 57,
111
+ :creation_date, 58,
112
+ :modification_date, 59,
113
+ :last_printed, 60,
114
+ :last_saved_by, 61,
115
+ :total_editing_time, 62,
116
+ :editing_cycles, 63,
117
+ :modified_by_software, 64,
118
+ :revision_history, 65,
119
+
120
+ :embedded_file_size, 66,
121
+ :finder_file_type, 67,
122
+ :finder_file_creator, 68,
123
+
124
+ # software package specifics (deb, rpm, tgz, elf)
125
+ :package_name, 69,
126
+ :package_version, 70,
127
+ :section, 71,
128
+ :upload_priority, 72,
129
+ :package_dependency, 73,
130
+ :package_conflicts, 74,
131
+ :package_replaces, 75,
132
+ :package_provides, 76,
133
+ :package_recommends, 77,
134
+ :package_suggests, 78,
135
+ :package_maintainer, 79,
136
+ :package_installed_size, 80,
137
+ :package_source, 81,
138
+ :package_essential, 82,
139
+ :target_architecture, 83,
140
+ :package_pre_dependency, 84,
141
+ :license, 85,
142
+ :package_distribution, 86,
143
+ :buildhost, 87,
144
+ :vendor, 88,
145
+ :target_os, 89,
146
+ :software_version, 90,
147
+ :target_platform, 91,
148
+ :resource_type, 92,
149
+ :library_search_path, 93,
150
+ :library_dependency, 94,
151
+
152
+ # photography specifics
153
+ :camera_make, 95,
154
+ :camera_model, 96,
155
+ :exposure, 97,
156
+ :aperture, 98,
157
+ :exposure_bias, 99,
158
+ :flash, 100,
159
+ :flash_bias, 101,
160
+ :focal_length, 102,
161
+ :focal_length_35mm, 103,
162
+ :iso_speed, 104,
163
+ :exposure_mode, 105,
164
+ :metering_mode, 106,
165
+ :macro_mode, 107,
166
+ :image_quality, 108,
167
+ :white_balance, 109,
168
+ :orientation, 110,
169
+ :magnification, 111,
170
+
171
+ # image specifics
172
+ :image_dimensions, 112,
173
+ :produced_by_software, 113,
174
+ :thumbnail, 114,
175
+ :image_resolution, 115,
176
+ :source, 116,
177
+
178
+ # (text) document processing specifics
179
+ :character_set, 117,
180
+ :line_count, 118,
181
+ :paragraph_count, 119,
182
+ :word_count, 120,
183
+ :character_count, 121,
184
+ :page_orientation, 122,
185
+ :paper_size, 123,
186
+ :template, 124,
187
+ :company, 125,
188
+ :manager, 126,
189
+ :revision_number, 127,
190
+
191
+ # music / video specifics
192
+ :duration, 128,
193
+ :album, 129,
194
+ :artist, 130,
195
+ :genre, 131,
196
+ :track_number, 132,
197
+ :disc_number, 133,
198
+ :performer, 134,
199
+ :contact_information, 135,
200
+ :song_version, 136,
201
+ :picture, 137,
202
+ :cover_picture, 138,
203
+ :contributor_picture, 139,
204
+ :event_picture, 140,
205
+ :logo, 141,
206
+ :broadcast_television_system, 142,
207
+ :source_device, 143,
208
+ :disclaimer, 144,
209
+ :warning, 145,
210
+ :page_order, 146,
211
+ :writer, 147,
212
+ :product_version, 148,
213
+ :contributor_name, 149,
214
+ :movie_director, 150,
215
+ :network_name, 151,
216
+ :show_name, 152,
217
+ :chapter_name, 153,
218
+ :song_count, 154,
219
+ :starting_song, 155,
220
+ :play_counter, 156,
221
+ :conductor, 157,
222
+ :interpretation, 158,
223
+ :composer, 159,
224
+ :beats_per_minute, 160,
225
+ :encoded_by, 161,
226
+ :original_title, 162,
227
+ :original_artist, 163,
228
+ :original_writer, 164,
229
+ :original_release_year, 165,
230
+ :original_performer, 166,
231
+ :lyrics, 167,
232
+ :popularity_meter, 168,
233
+ :licensee, 169,
234
+ :musician_credits_list, 170,
235
+ :mood, 171,
236
+ :subtitle, 172,
237
+
238
+ # GNUnet specific values (never extracted)
239
+ :gnunet_display_type, 173,
240
+ :gnunet_full_data, 174,
241
+ :rating, 175,
242
+ :organization, 176,
243
+ :ripper, 177,
244
+ :producer, 178,
245
+ :group, 179,
246
+
247
+ :last, 180
248
+ ]
249
+
250
+ callback :extractor_meta_data_processor, [:pointer, :string, :extractor_meta_type, :extractor_meta_format, :string, :pointer, :size_t], :int
251
+
252
+ callback :extractor_extract_method, [:string, :size_t, :extractor_meta_data_processor, :pointer, :string], :int
253
+
254
+ typedef :pointer, :extractor_plugin_list
255
+
256
+ end
257
+ end
@@ -0,0 +1,26 @@
1
+ #
2
+ # ffi-extractor - Ruby FFI bindings for libextractor
3
+ #
4
+ # Copyright (c) 2012 - Hal Brodigan (postmodern.mod3 at gmail.com)
5
+ #
6
+ # This program is free software; you can redistribute it and/or modify
7
+ # it under the terms of the GNU General Public License as published by
8
+ # the Free Software Foundation; either version 2 of the License, or
9
+ # (at your option) any later version.
10
+ #
11
+ # This program is distributed in the hope that it will be useful,
12
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
13
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14
+ # GNU General Public License for more details.
15
+ #
16
+ # You should have received a copy of the GNU General Public License
17
+ # along with this program; if not, write to the Free Software
18
+ # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
19
+ #
20
+
21
+ module FFI
22
+ module Extractor
23
+ # ffi-extractor version
24
+ VERSION = "0.1.0"
25
+ end
26
+ end
@@ -0,0 +1,23 @@
1
+ #
2
+ # ffi-extractor - Ruby FFI bindings for libextractor
3
+ #
4
+ # Copyright (c) 2012 - Hal Brodigan (postmodern.mod3 at gmail.com)
5
+ #
6
+ # This program is free software; you can redistribute it and/or modify
7
+ # it under the terms of the GNU General Public License as published by
8
+ # the Free Software Foundation; either version 2 of the License, or
9
+ # (at your option) any later version.
10
+ #
11
+ # This program is distributed in the hope that it will be useful,
12
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
13
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14
+ # GNU General Public License for more details.
15
+ #
16
+ # You should have received a copy of the GNU General Public License
17
+ # along with this program; if not, write to the Free Software
18
+ # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
19
+ #
20
+
21
+ require 'ffi/extractor/library'
22
+ require 'ffi/extractor/extractor'
23
+ require 'ffi/extractor/version'
@@ -0,0 +1,45 @@
1
+ require 'spec_helper'
2
+ require 'ffi/extractor'
3
+
4
+ require 'yaml'
5
+
6
+ describe FFI::Extractor do
7
+ it "should have a VERSION constant" do
8
+ subject.const_get('VERSION').should_not be_empty
9
+ end
10
+
11
+ describe "abort!" do
12
+ it "should throw :return" do
13
+ lambda { subject.abort! }.should throw_symbol :return
14
+ end
15
+ end
16
+
17
+ let(:file) { 'spec/files/image.jpg' }
18
+ let(:data) { File.new(file,'rb').read }
19
+
20
+ let(:metadata) { YAML.load_file('spec/files/image.meta') }
21
+
22
+ describe "extract" do
23
+ it "should extract metadata from a String" do
24
+ findings = []
25
+
26
+ subject.extract(data) do |*arguments|
27
+ findings << arguments
28
+ end
29
+
30
+ findings.should =~ metadata
31
+ end
32
+ end
33
+
34
+ describe "extract_from" do
35
+ it "should extract metadata from a file" do
36
+ findings = []
37
+
38
+ subject.extract_from(file) do |*arguments|
39
+ findings << arguments
40
+ end
41
+
42
+ findings.should =~ metadata
43
+ end
44
+ end
45
+ end
Binary file