oai_talia 0.0.13

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (84) hide show
  1. data/README +81 -0
  2. data/Rakefile +127 -0
  3. data/bin/oai +68 -0
  4. data/examples/models/file_model.rb +63 -0
  5. data/examples/providers/dublin_core.rb +474 -0
  6. data/lib/oai/client/get_record.rb +15 -0
  7. data/lib/oai/client/header.rb +18 -0
  8. data/lib/oai/client/identify.rb +30 -0
  9. data/lib/oai/client/list_identifiers.rb +12 -0
  10. data/lib/oai/client/list_metadata_formats.rb +12 -0
  11. data/lib/oai/client/list_records.rb +21 -0
  12. data/lib/oai/client/list_sets.rb +19 -0
  13. data/lib/oai/client/metadata_format.rb +12 -0
  14. data/lib/oai/client/record.rb +26 -0
  15. data/lib/oai/client/response.rb +35 -0
  16. data/lib/oai/client.rb +301 -0
  17. data/lib/oai/constants.rb +34 -0
  18. data/lib/oai/exception.rb +75 -0
  19. data/lib/oai/harvester/config.rb +41 -0
  20. data/lib/oai/harvester/harvest.rb +150 -0
  21. data/lib/oai/harvester/logging.rb +70 -0
  22. data/lib/oai/harvester/mailer.rb +17 -0
  23. data/lib/oai/harvester/shell.rb +338 -0
  24. data/lib/oai/harvester.rb +39 -0
  25. data/lib/oai/provider/metadata_format/oai_dc.rb +29 -0
  26. data/lib/oai/provider/metadata_format/oai_europeana.rb +38 -0
  27. data/lib/oai/provider/metadata_format.rb +143 -0
  28. data/lib/oai/provider/model/activerecord_caching_wrapper.rb +134 -0
  29. data/lib/oai/provider/model/activerecord_wrapper.rb +139 -0
  30. data/lib/oai/provider/model.rb +74 -0
  31. data/lib/oai/provider/partial_result.rb +18 -0
  32. data/lib/oai/provider/response/error.rb +16 -0
  33. data/lib/oai/provider/response/get_record.rb +26 -0
  34. data/lib/oai/provider/response/identify.rb +25 -0
  35. data/lib/oai/provider/response/list_identifiers.rb +35 -0
  36. data/lib/oai/provider/response/list_metadata_formats.rb +34 -0
  37. data/lib/oai/provider/response/list_records.rb +34 -0
  38. data/lib/oai/provider/response/list_sets.rb +23 -0
  39. data/lib/oai/provider/response/record_response.rb +70 -0
  40. data/lib/oai/provider/response.rb +161 -0
  41. data/lib/oai/provider/resumption_token.rb +106 -0
  42. data/lib/oai/provider.rb +304 -0
  43. data/lib/oai/set.rb +29 -0
  44. data/lib/oai/xpath.rb +75 -0
  45. data/lib/oai.rb +8 -0
  46. data/lib/test.rb +25 -0
  47. data/test/activerecord_provider/config/connection.rb +5 -0
  48. data/test/activerecord_provider/config/database.yml +6 -0
  49. data/test/activerecord_provider/database/ar_migration.rb +59 -0
  50. data/test/activerecord_provider/database/oaipmhtest +0 -0
  51. data/test/activerecord_provider/fixtures/dc.yml +1501 -0
  52. data/test/activerecord_provider/helpers/providers.rb +44 -0
  53. data/test/activerecord_provider/helpers/set_provider.rb +36 -0
  54. data/test/activerecord_provider/models/dc_field.rb +7 -0
  55. data/test/activerecord_provider/models/dc_set.rb +6 -0
  56. data/test/activerecord_provider/models/oai_token.rb +3 -0
  57. data/test/activerecord_provider/tc_ar_provider.rb +113 -0
  58. data/test/activerecord_provider/tc_ar_sets_provider.rb +72 -0
  59. data/test/activerecord_provider/tc_caching_paging_provider.rb +55 -0
  60. data/test/activerecord_provider/tc_simple_paging_provider.rb +57 -0
  61. data/test/activerecord_provider/test_helper.rb +4 -0
  62. data/test/client/helpers/provider.rb +68 -0
  63. data/test/client/helpers/test_wrapper.rb +11 -0
  64. data/test/client/tc_exception.rb +36 -0
  65. data/test/client/tc_get_record.rb +37 -0
  66. data/test/client/tc_identify.rb +13 -0
  67. data/test/client/tc_libxml.rb +61 -0
  68. data/test/client/tc_list_identifiers.rb +52 -0
  69. data/test/client/tc_list_metadata_formats.rb +18 -0
  70. data/test/client/tc_list_records.rb +13 -0
  71. data/test/client/tc_list_sets.rb +19 -0
  72. data/test/client/tc_low_resolution_dates.rb +14 -0
  73. data/test/client/tc_utf8_escaping.rb +11 -0
  74. data/test/client/tc_xpath.rb +26 -0
  75. data/test/client/test_helper.rb +5 -0
  76. data/test/provider/models.rb +234 -0
  77. data/test/provider/tc_exceptions.rb +96 -0
  78. data/test/provider/tc_functional_tokens.rb +43 -0
  79. data/test/provider/tc_provider.rb +71 -0
  80. data/test/provider/tc_resumption_tokens.rb +46 -0
  81. data/test/provider/tc_simple_provider.rb +92 -0
  82. data/test/provider/test_helper.rb +36 -0
  83. data/test/test.xml +22 -0
  84. metadata +181 -0
@@ -0,0 +1,338 @@
1
+ module OAI
2
+ module Harvester
3
+ # = OAI::Harvester::Shell
4
+ #
5
+ # A OAI-PMH client shell allowing OAI Harvesting to be configured in
6
+ # an interactive manner. Typing 'oai' on the command line starts the
7
+ # shell. The first time the shell is run it will prompt for the following
8
+ # configuration details:
9
+ # 1. A storage directory for all harvested records. Harvests will be
10
+ # stored under this directory in a directory structure based on the
11
+ # date of the harvest.
12
+ # 2. A log file directory.
13
+ # 3. Email address(es) for sending daily harvesting activity reports.
14
+ # 4. Network address of the SMTP server for sending mail.
15
+ #
16
+ # After the initial configuration, new harvest sites can be added by using
17
+ # the 'new' command. Sites are identified via nickname assigned by the
18
+ # user. After choosing a nickname, provide the URL of a harvestable site,
19
+ # and the shell will prompt you for the rest of the configuration
20
+ # information.
21
+ #
22
+ # The shell automatically pulls down the list of sets in the repository, and
23
+ # the supported metadata prefixes. Making it very simple to setup harvests.
24
+ #
25
+ class Shell
26
+ include Readline
27
+
28
+ def initialize(config)
29
+ @conf = config
30
+ @conf.sites ||= {} # Initialize sites hash there isn't one
31
+ end
32
+
33
+ def start
34
+ unless @conf.storage
35
+ banner "Entering first-time setup"
36
+ config
37
+ setup_cron
38
+ end
39
+ puts "type 'help' for help"
40
+ while((input = readline("oai> ", true)) != 'exit')
41
+ begin
42
+ cmd = input.split
43
+ if 1 == cmd.size
44
+ self.send(cmd[0])
45
+ else
46
+ self.send(cmd.shift, cmd.join(" "))
47
+ end
48
+ rescue NoMethodError
49
+ puts "Not a recognized command. Type 'help' for clues."
50
+ rescue
51
+ puts "An error occurred:"
52
+ puts $!
53
+ puts $!.backtrace.join("\n")
54
+ end
55
+ end
56
+ end
57
+
58
+ private
59
+
60
+ def help
61
+ banner "Commands:"
62
+ puts "\tharvest site [date] - Harvest site(s) manually"
63
+ puts "\tconfig - Configure harvester"
64
+ puts "\tlist <config> - List known providers or configuration"
65
+ puts "\tinfo [site[, site]] - Show information about a provider."
66
+ puts "\tnew - Add a new provider site to harvester"
67
+ puts "\tremove [site] - Remove a provider site from harvester"
68
+ puts "\tedit [site] - Change settings for a provider site"
69
+ puts "\texit - Exit the harvester shell.\n\n"
70
+ end
71
+
72
+ def harvest(options)
73
+ site, *date = options.split(/\s/)
74
+ if @conf.sites.keys.include?(site)
75
+ banner "Harvesting '#{site}'"
76
+ if date && !date.empty?
77
+ begin
78
+ date = Time.parse(date.join(' ')).utc
79
+ rescue NoMethodError
80
+ puts "Couldn't parse the date supplied"
81
+ return
82
+ end
83
+ else
84
+ date = nil
85
+ end
86
+ harvester = Harvest.new(@conf, @conf.storage, date)
87
+ harvester.start(site, true)
88
+ puts "done"
89
+ else
90
+ puts "Unknown repository: '#{args[0]}'"
91
+ end
92
+ puts # blank line
93
+ end
94
+
95
+ def list(args = nil)
96
+ if 'config' == args
97
+ banner "Current Configuration"
98
+ list_config
99
+ else
100
+ banner "Configured Repositories"
101
+ @conf.sites.keys.each do |k|
102
+ puts k
103
+ end
104
+ end
105
+ puts # blank line
106
+ end
107
+
108
+ def info(args)
109
+ banner "Provider Site Information"
110
+ sites = args.split(/[,\s|\s|,]/)
111
+ sites.each do |site|
112
+ print_site(site)
113
+ end
114
+ puts
115
+ rescue
116
+ puts args + " doesn't appear to be configured, use list to see configured repositories."
117
+ end
118
+
119
+ def new
120
+ banner "Define New Harvesting Site"
121
+ name, site = form
122
+ @conf.sites[name] = site
123
+ @conf.save
124
+ end
125
+
126
+ def edit(name)
127
+ banner "Edit Harvesting Site"
128
+ name, site = form(name)
129
+ @conf.sites[name] = site
130
+ @conf.save
131
+ end
132
+
133
+ def remove(site)
134
+ if 'Y' == readline("Remove #{site}? (Y/N): ").upcase
135
+ @conf.sites.delete(site)
136
+ @conf.save
137
+ puts "#{site} removed"
138
+ end
139
+ end
140
+
141
+ # http://oai.getty.edu:80/oaicat/OAIHandler
142
+ def form(name = nil)
143
+ begin
144
+ if not name
145
+ name = prompt("nickname", nil)
146
+ while(@conf.sites.keys.include?(name))
147
+ show 0, "Nickname already in use, choose another."
148
+ name = prompt("nickname")
149
+ end
150
+ end
151
+ site = @conf.sites[name] || {}
152
+
153
+ # URL
154
+ url = prompt("url", site['url'])
155
+ while(not (site['url'] = verify(url)))
156
+ puts "Trouble contacting provider, bad url?"
157
+ url = prompt("url", site['url'])
158
+ end
159
+
160
+ # Metadata formats
161
+ formats = metadata(site['url'])
162
+ report "Repository supports [#{formats.join(', ')}] metadata formats."
163
+ prefix = prompt("prefix", site['prefix'])
164
+ while(not formats.include?(prefix))
165
+ prefix = prompt("prefix", site['prefix'])
166
+ end
167
+ site['prefix'] = prefix
168
+
169
+ # Sets
170
+ sets = ['all']
171
+ begin
172
+ sets.concat sets(site['url'])
173
+ site['set'] = 'all' unless site['set'] # default to all sets
174
+ report "Repository supports [#{sets.join(', ')}] metadata sets."
175
+ set = prompt("set", site['set'])
176
+ while(not sets.include?(site['set']))
177
+ set = prompt("set", site['set'])
178
+ end
179
+ site['set'] = set
180
+ rescue
181
+ site['set'] = 'all'
182
+ end
183
+
184
+ # Period
185
+ period = expand_period(prompt("period", "daily"))
186
+ while(not Config::PERIODS.include?(period))
187
+ puts "Must be daily, weekly, or monthly"
188
+ period = expand_period(prompt("period", "daily"))
189
+ end
190
+
191
+ site['period'] = period
192
+
193
+ return [name, site]
194
+ rescue
195
+ puts "Problem adding/updating provider, aborting. (#{$!})"
196
+ end
197
+ end
198
+
199
+ def config
200
+ begin
201
+ directory = prompt("storage directory", @conf.storage)
202
+ while not directory_acceptable(directory)
203
+ directory = prompt("storage directory: ", @conf.storage)
204
+ end
205
+
206
+ email = @conf.email.join(', ') rescue nil
207
+ @conf.email = parse_emails(prompt("email", email))
208
+
209
+ @conf.mail_server = prompt("mail server", @conf.mail_server)
210
+
211
+ logfile = prompt("log file(s) directory", @conf.logfile)
212
+ while not directory_acceptable(logfile)
213
+ logfile = prompt("log file(s) directory", @conf.logfile)
214
+ end
215
+ @conf.storage = directory
216
+ @conf.logfile = logfile
217
+ @conf.save
218
+ rescue
219
+ nil
220
+ end
221
+ end
222
+
223
+ def display(key, value, split = 40)
224
+ (split - key.size).times { print " " } if key.size < split
225
+ puts "#{key}: #{value}"
226
+ end
227
+
228
+ def banner(str)
229
+ puts "\n#{str}"
230
+ str.size.times { print "-" }
231
+ puts "\n"
232
+ end
233
+
234
+ def report(str)
235
+ puts "\n#{str}\n"
236
+ end
237
+
238
+ def indent(number)
239
+ number.times do
240
+ print "\t"
241
+ end
242
+ end
243
+
244
+ def prompt(text, default = nil, split = 20)
245
+ prompt_text = "#{text} [#{default}]: "
246
+ (split - prompt_text.size).times { print " " } if prompt_text.size < split
247
+ value = readline(prompt_text, true)
248
+ raise RuntimeError.new("Exit loop") unless value
249
+ return value.empty? ? default : value
250
+ end
251
+
252
+ def verify(url)
253
+ begin
254
+ client = OAI::Client.new(url, :redirects => false)
255
+ identify = client.identify
256
+ puts "Repository name \"#{identify.repository_name}\""
257
+ return url
258
+ rescue
259
+ if $!.to_s =~ /^Permanently Redirected to \[(.*)\?.*\]/
260
+ report "Provider redirected to: #{$1}"
261
+ verify($1)
262
+ else
263
+ puts "Error selecting repository: #{$!}"
264
+ end
265
+ end
266
+ end
267
+
268
+ def metadata(url)
269
+ formats = []
270
+ client = OAI::Client.new url
271
+ response = client.list_metadata_formats
272
+ response.to_a.each do |format|
273
+ formats << format.prefix
274
+ end
275
+ formats
276
+ end
277
+
278
+ def sets(url)
279
+ sets = []
280
+ client = OAI::Client.new url
281
+ response = client.list_sets
282
+ response.to_a.each do |set|
283
+ sets << set.spec
284
+ end
285
+ sets
286
+ end
287
+
288
+ def directory_acceptable(dir)
289
+ if not (dir && File.exists?(dir) && File.writable?(dir))
290
+ puts "Directory doesn't exist, or isn't writtable."
291
+ return false
292
+ end
293
+ true
294
+ end
295
+
296
+ def expand_period(str)
297
+ return str if Config::PERIODS.include?(str)
298
+ Config::PERIODS.each { |p| return p if p =~ /^#{str}/}
299
+ nil
300
+ end
301
+
302
+ def parse_emails(emails)
303
+ return nil unless emails
304
+ addresses = emails.split(/[,\s|\s|,]/)
305
+ end
306
+
307
+ def list_config
308
+ display("storage directory", @conf.storage, 20)
309
+ display("email", @conf.email.join(', '), 20) if @conf.email
310
+ display("mail server", @conf.mail_server, 20) if @conf.mail_server
311
+ display("log location", @conf.logfile, 20) if @conf.logfile
312
+ end
313
+
314
+ def list_sites
315
+ banner "Sites"
316
+ @conf.sites.each_key { |site| print_site(site) }
317
+ end
318
+
319
+ def print_site(site)
320
+ puts site
321
+ @conf.sites[site].each { |k,v| display(k, v, 15)}
322
+ end
323
+
324
+ def setup_cron
325
+ banner "Scheduling Automatic Harvesting"
326
+ puts "To activate automatic harvesting you must add an entry to"
327
+ puts "your scheduler. Linux/Mac OS X users should add the following"
328
+ puts "entry to their crontabs:\n\n"
329
+ puts "0 0 * * * #{$0} -D\n\n"
330
+ puts "Windows users should use WinAt to schedule"
331
+ puts "#{$0} to run every night.\n\n\n"
332
+ end
333
+
334
+ end
335
+
336
+ end
337
+ end
338
+
@@ -0,0 +1,39 @@
1
+ require 'zlib'
2
+ require 'net/smtp'
3
+ require 'yaml'
4
+ require 'tempfile'
5
+ require 'logger'
6
+ require 'fileutils'
7
+ require 'ostruct'
8
+ require 'readline'
9
+ require 'chronic'
10
+ require 'socket'
11
+
12
+ require 'oai/client'
13
+ require 'oai/harvester/config'
14
+ require 'oai/harvester/harvest'
15
+ require 'oai/harvester/logging'
16
+ require 'oai/harvester/mailer'
17
+ require 'oai/harvester/shell'
18
+
19
+ def harvestable_sites(conf)
20
+ sites = []
21
+ conf.sites.each do |k, v|
22
+ sites << k if needs_updating(v['period'], v['last'])
23
+ end if conf.sites
24
+ sites
25
+ end
26
+
27
+ def needs_updating(period, last)
28
+ return true if last.nil?
29
+ case period
30
+ when 'daily'
31
+ return true if Time.now - last > 86000
32
+ when 'weekly'
33
+ return true if Time.now - last > 604000
34
+ when 'monthly'
35
+ return true if Time.now - last > 2591000
36
+ end
37
+ return false
38
+ end
39
+
@@ -0,0 +1,29 @@
1
+ module OAI::Provider::Metadata
2
+ # = OAI::Metadata::DublinCore
3
+ #
4
+ # Simple implementation of the Dublin Core metadata format.
5
+ class DublinCore < Format
6
+
7
+ def initialize
8
+ @prefix = 'oai_dc'
9
+ @schema = 'http://www.openarchives.org/OAI/2.0/oai_dc.xsd'
10
+ @namespace = 'http://www.openarchives.org/OAI/2.0/oai_dc/'
11
+ @element_namespace = 'dc'
12
+ @fields = [ :title, :creator, :subject, :description, :publisher,
13
+ :contributor, :date, :type, :format, :identifier,
14
+ :source, :language, :relation, :coverage, :rights]
15
+ end
16
+
17
+ def header_specification
18
+ {
19
+ 'xmlns:oai_dc' => "http://www.openarchives.org/OAI/2.0/oai_dc/",
20
+ 'xmlns:dc' => "http://purl.org/dc/elements/1.1/",
21
+ 'xmlns:xsi' => "http://www.w3.org/2001/XMLSchema-instance",
22
+ 'xsi:schemaLocation' =>
23
+ %{http://www.openarchives.org/OAI/2.0/oai_dc/
24
+ http://www.openarchives.org/OAI/2.0/oai_dc.xsd}
25
+ }
26
+ end
27
+
28
+ end
29
+ end
@@ -0,0 +1,38 @@
1
+ module OAI::Provider::Metadata
2
+ # = OAI::Metadata::Europeana
3
+ #
4
+ # Simple implementation of the Europeana metadata format.
5
+ class Europeana < Format
6
+
7
+ def initialize
8
+ @prefix = 'ese'
9
+ @schema = 'http://www.openarchives.org/OAI/2.0/oai_dc.xsd'
10
+ @namespace = 'http://www.openarchives.org/OAI/2.0/oai_dc/'
11
+ @element_namespace = 'ese'
12
+ @fields = {:dc => [:title, :creator, :subject, :description, :publisher,
13
+ :contributor, :date, :type, :format, :identifier,
14
+ :source, :language, :relation, :coverage, :rights],
15
+ :dcterms => :provenance,
16
+ :ese => [:userTag, :unstored, :object, :language, :provider,
17
+ :type, :uri, :year, :hasObject, :country]
18
+ }
19
+ end
20
+
21
+ def header_specification
22
+ {
23
+ 'xmlns:oai_dc' => 'http://www.openarchives.org/OAI/2.0/oai_dc/',
24
+ 'xmlns:ese' => 'http://europeana.eu/terms/',
25
+ 'xmlns:dc' => 'http://purl.org/dc/terms/',
26
+ 'xmlns:dcterms' => 'http://purl.org/dc/terms/',
27
+ 'xmlns:europeana' => 'http://europeana.eu/terms/',
28
+ 'xmlns:local' => 'http://metadata.cerl.org/oai/namespace/local/',
29
+ 'xmlns:xsi' => 'http://www.w3.org/2001/XMLSchema-instance',
30
+ 'xsi:schemaLocation' =>
31
+ %{http://purl.org/dc/elements/1.1/
32
+ http://dublincore.org/schemas/xmls/qdc/dc.xsd http://purl.org/dc/terms/
33
+ http://dublincore.org/schemas/xmls/qdc/dcterms.xsd}
34
+ }
35
+ end
36
+
37
+ end
38
+ end
@@ -0,0 +1,143 @@
1
+ require 'singleton'
2
+
3
+ module OAI::Provider::Metadata
4
+ # == Metadata Base Class
5
+ #
6
+ # MetadataFormat is the base class from which all other format classes
7
+ # should inherit. Format classes provide mapping of record fields into XML.
8
+ #
9
+ # * prefix - contains the metadata_prefix used to select the format
10
+ # * schema - location of the xml schema
11
+ # * namespace - location of the namespace document
12
+ # * element_namespace - the namespace portion of the XML elements
13
+ # * fields - list of fields in this metadata format
14
+ #
15
+ # See OAI::Metadata::DublinCore for an example
16
+ #
17
+ class Format
18
+ include Singleton
19
+
20
+ attr_accessor :prefix, :schema, :namespace, :element_namespace, :fields
21
+
22
+ # Provided a model, and a record belonging to that model this method
23
+ # will return an xml represention of the record. This is the method
24
+ # that should be extended if you need to create more complex xml
25
+ # representations.
26
+ def encode(model, record)
27
+ if record.respond_to?("to_#{prefix}")
28
+ record.send("to_#{prefix}")
29
+ else
30
+ xml = Builder::XmlMarkup.new
31
+ map = model.respond_to?("map_#{prefix}") ? model.send("map_#{prefix}") : {}
32
+ xml.tag!("#{prefix}:#{element_namespace}", header_specification) do
33
+ if fields.is_a?(Array)
34
+ fields.each do |field|
35
+ values = value_for(field, record, map)
36
+ if values.respond_to?(:each)
37
+ values.each do |value|
38
+ xml.tag! "#{element_namespace}:#{field}", value
39
+ end
40
+ else
41
+ xml.tag! "#{element_namespace}:#{field}", values
42
+ end
43
+ end
44
+ elsif fields.is_a?(Hash)
45
+ fields.each_pair do |key, field_array|
46
+ field_array.to_a.each do |field|
47
+ values = value_for(field, record, map)
48
+ if values.respond_to?(:each)
49
+ values.each do |value|
50
+ xml.tag! "#{key}:#{field}", value
51
+ end
52
+ else
53
+ xml.tag! "#{key}:#{field}", values
54
+ end
55
+ end
56
+ end
57
+ end
58
+ end
59
+ xml.target!
60
+ end
61
+ end
62
+
63
+ private
64
+
65
+ # We try a bunch of different methods to get the data from the model.
66
+ #
67
+ # 1. Check if the model defines a field mapping for the field of
68
+ # interest.
69
+ # 2. Try calling the pluralized name method on the model.
70
+ # 3. Try calling the singular name method on the model
71
+ def value_for(field, record, map)
72
+ method = map[field] ? map[field].to_s : field.to_s
73
+
74
+ if record.respond_to?(pluralize(method))
75
+ record.send pluralize(method)
76
+ elsif record.respond_to?(method)
77
+ # at this point, this function will throw a dep. error because of the call to type -- a reserved work
78
+ # in ruby
79
+ silence_warnings { record.send method }
80
+ else
81
+ []
82
+ end
83
+ end
84
+
85
+ # Subclasses must override
86
+ def header_specification
87
+ raise NotImplementedError.new
88
+ end
89
+
90
+ # Shamelessly lifted form ActiveSupport. Thanks Rails community!
91
+ def pluralize(word)
92
+ # Use ActiveSupports pluralization if it's available.
93
+ return word.pluralize if word.respond_to?(:pluralize)
94
+
95
+ # Otherwise use our own simple pluralization rules.
96
+ result = word.to_s.dup
97
+
98
+ # Uncountable words
99
+ return result if %w(equipment information rice money species series fish sheep).include?(result)
100
+
101
+ # Irregular words
102
+ { 'person' => 'people', 'man' => 'men', 'child' => 'children', 'sex' => 'sexes',
103
+ 'move' => 'moves', 'cow' => 'kine' }.each { |k,v| return v if word == k }
104
+
105
+ rules.each { |(rule, replacement)| break if result.gsub!(rule, replacement) }
106
+ result
107
+ end
108
+
109
+ def rules
110
+ [
111
+ [/$/, 's'],
112
+ [/s$/i, 's'],
113
+ [/(ax|test)is$/i, '\1es'],
114
+ [/(octop|vir)us$/i, '\1i'],
115
+ [/(alias|status)$/i, '\1es'],
116
+ [/(bu)s$/i, '\1ses'],
117
+ [/(buffal|tomat)o$/i, '\1oes'],
118
+ [/([ti])um$/i, '\1a'],
119
+ [/sis$/i, 'ses'],
120
+ [/(?:([^f])fe|([lr])f)$/i, '\1\2ves'],
121
+ [/(hive)$/i, '\1s'],
122
+ [/([^aeiouy]|qu)y$/i, '\1ies'],
123
+ [/(x|ch|ss|sh)$/i, '\1es'],
124
+ [/(matr|vert|ind)(?:ix|ex)$/i, '\1ices'],
125
+ [/([m|l])ouse$/i, '\1ice'],
126
+ [/^(ox)$/i, '\1en'],
127
+ [/(quiz)$/i, '\1zes']
128
+ ]
129
+ end
130
+
131
+ def silence_warnings
132
+ old_verbose, $VERBOSE = $VERBOSE, nil
133
+ yield
134
+ ensure
135
+ $VERBOSE = old_verbose
136
+ end
137
+
138
+
139
+ end
140
+
141
+ end
142
+
143
+ Dir.glob(File.dirname(__FILE__) + '/metadata_format/*.rb').each {|lib| require lib}