traject 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (64) hide show
  1. data/.gitignore +18 -0
  2. data/Gemfile +4 -0
  3. data/LICENSE.txt +22 -0
  4. data/README.md +346 -0
  5. data/Rakefile +16 -0
  6. data/bin/traject +153 -0
  7. data/doc/macros.md +103 -0
  8. data/doc/settings.md +34 -0
  9. data/lib/traject.rb +10 -0
  10. data/lib/traject/indexer.rb +196 -0
  11. data/lib/traject/json_writer.rb +51 -0
  12. data/lib/traject/macros/basic.rb +9 -0
  13. data/lib/traject/macros/marc21.rb +145 -0
  14. data/lib/traject/marc_extractor.rb +206 -0
  15. data/lib/traject/marc_reader.rb +61 -0
  16. data/lib/traject/qualified_const_get.rb +30 -0
  17. data/lib/traject/solrj_writer.rb +120 -0
  18. data/lib/traject/translation_map.rb +184 -0
  19. data/lib/traject/version.rb +3 -0
  20. data/test/indexer/macros_marc21_test.rb +146 -0
  21. data/test/indexer/macros_test.rb +40 -0
  22. data/test/indexer/map_record_test.rb +120 -0
  23. data/test/indexer/read_write_test.rb +47 -0
  24. data/test/indexer/settings_test.rb +65 -0
  25. data/test/marc_extractor_test.rb +168 -0
  26. data/test/marc_reader_test.rb +29 -0
  27. data/test/solrj_writer_test.rb +106 -0
  28. data/test/test_helper.rb +28 -0
  29. data/test/test_support/hebrew880s.marc +1 -0
  30. data/test/test_support/manufacturing_consent.marc +1 -0
  31. data/test/test_support/test_data.utf8.marc.xml +2609 -0
  32. data/test/test_support/test_data.utf8.mrc +1 -0
  33. data/test/translation_map_test.rb +98 -0
  34. data/test/translation_maps/bad_ruby.rb +8 -0
  35. data/test/translation_maps/bad_yaml.yaml +1 -0
  36. data/test/translation_maps/both_map.rb +1 -0
  37. data/test/translation_maps/both_map.yaml +1 -0
  38. data/test/translation_maps/default_literal.rb +10 -0
  39. data/test/translation_maps/default_passthrough.rb +10 -0
  40. data/test/translation_maps/marc_040a_translate_test.yaml +1 -0
  41. data/test/translation_maps/ruby_map.rb +10 -0
  42. data/test/translation_maps/translate_array_test.yaml +8 -0
  43. data/test/translation_maps/yaml_map.yaml +7 -0
  44. data/traject.gemspec +30 -0
  45. data/vendor/solrj/README +8 -0
  46. data/vendor/solrj/build.xml +39 -0
  47. data/vendor/solrj/ivy.xml +16 -0
  48. data/vendor/solrj/lib/commons-codec-1.7.jar +0 -0
  49. data/vendor/solrj/lib/commons-io-2.1.jar +0 -0
  50. data/vendor/solrj/lib/httpclient-4.2.3.jar +0 -0
  51. data/vendor/solrj/lib/httpcore-4.2.2.jar +0 -0
  52. data/vendor/solrj/lib/httpmime-4.2.3.jar +0 -0
  53. data/vendor/solrj/lib/jcl-over-slf4j-1.6.6.jar +0 -0
  54. data/vendor/solrj/lib/jul-to-slf4j-1.6.6.jar +0 -0
  55. data/vendor/solrj/lib/log4j-1.2.16.jar +0 -0
  56. data/vendor/solrj/lib/noggit-0.5.jar +0 -0
  57. data/vendor/solrj/lib/slf4j-api-1.6.6.jar +0 -0
  58. data/vendor/solrj/lib/slf4j-log4j12-1.6.6.jar +0 -0
  59. data/vendor/solrj/lib/solr-solrj-4.3.1-javadoc.jar +0 -0
  60. data/vendor/solrj/lib/solr-solrj-4.3.1-sources.jar +0 -0
  61. data/vendor/solrj/lib/solr-solrj-4.3.1.jar +0 -0
  62. data/vendor/solrj/lib/wstx-asl-3.2.7.jar +0 -0
  63. data/vendor/solrj/lib/zookeeper-3.4.5.jar +0 -0
  64. metadata +264 -0
@@ -0,0 +1,206 @@
1
+
2
+
3
+ module Traject
4
+ # MarcExtractor is a class for extracting lists of strings from a MARC::Record,
5
+ # according to specifications. See #parse_string_spec for description of string
6
+ # string arguments used to specify extraction. See #initialize for options
7
+ # that can be set controlling extraction.
8
+ #
9
+ # Examples:
10
+ #
11
+ # array_of_stuff = MarcExtractor.new(marc_record, "001:245abc:700a").extract
12
+ # values = MarcExtractor.new(marc_record, "040a", :seperator => nil).extract
13
+ #
14
+ class MarcExtractor
15
+ attr_accessor :options, :marc_record, :spec_hash
16
+
17
+
18
+ # Convenience method to construct a MarcExtractor object and
19
+ # run extract on it.
20
+ #
21
+ # First arg is a marc record.
22
+ #
23
+ # Second arg is either a string that will be given to parse_string_spec,
24
+ # OR a hash that's the return value of parse_string_spec.
25
+ #
26
+ # Third arg is an optional options hash that will be passed as
27
+ # third arg of MarcExtractor constructor.
28
+ def self.extract_by_spec(marc_record, specification, options = {})
29
+ (raise IllegalArgument, "first argument must not be nil") if marc_record.nil?
30
+
31
+ unless specification.kind_of? Hash
32
+ specification = self.parse_string_spec(specification)
33
+ end
34
+
35
+ Traject::MarcExtractor.new(marc_record, specification, options).extract
36
+ end
37
+
38
+ # Take a hash that's the output of #parse_string_spec, return
39
+ # an array of strings extracted from a marc record accordingly
40
+ #
41
+ # options:
42
+ #
43
+ # [:seperator] default ' ' (space), what to use to seperate
44
+ # subfield values when joining strings
45
+ #
46
+ # [:alternate_script] default :include, include linked 880s for tags
47
+ # that match spec. Also:
48
+ # * false => do not include.
49
+ # * :only => only include linked 880s, not original
50
+ def initialize(marc_record, spec_hash, options = {})
51
+ self.options = {
52
+ :seperator => ' ',
53
+ :alternate_script => :include
54
+ }.merge(options)
55
+
56
+ raise IllegalArgumentException("second arg to MarcExtractor.new must be a Hash specification object") unless spec_hash.kind_of? Hash
57
+
58
+ self.marc_record = marc_record
59
+ self.spec_hash = spec_hash
60
+ end
61
+
62
+ # Converts from a string marc spec like "245abc:700a" to a nested hash used internally
63
+ # to represent the specification.
64
+ #
65
+ # a String specification is a string of form:
66
+ # {tag}{|indicators|}{subfields} seperated by colons
67
+ # tag is three chars (usually but not neccesarily numeric),
68
+ # indicators are optional two chars prefixed by hyphen,
69
+ # subfields are optional list of chars (alphanumeric)
70
+ #
71
+ # indicator spec must be two chars, but one can be * meaning "don't care".
72
+ # space to mean 'blank'
73
+ #
74
+ # "245|01|abc65:345abc:700|*5|:800"
75
+ #
76
+ # Or, for control (fixed) fields (ordinarily fields 001-010), you can include a byte slice specification,
77
+ # but can NOT include subfield or indicator specifications. Plus can use special tag "LDR" for
78
+ # the marc leader. (TODO)
79
+ #
80
+ # "008[35-37]:LDR[5]"
81
+ # => bytes 35-37 inclusive of field 008, and byte 5 of the marc leader.
82
+ #
83
+ # Returns a nested hash keyed by tags.
84
+ # { tag => {
85
+ # :subfields => ['a', 'b', '2'] # actually, a SET. may be empty or nil
86
+ # :indicators => ['1', '0'] # An array. may be empty or nil; duple, either one can be nil
87
+ # }
88
+ #}
89
+ # For byte offsets, :bytes => 12 or :bytes => (7..10)
90
+ #
91
+ # * subfields and indicators can only be provided for marc data/variable fields
92
+ # * byte slice can only be provided for marc control fields (generally tags less than 010)
93
+ #
94
+ # See tests for more examples.
95
+ def self.parse_string_spec(spec_string)
96
+ hash = {}
97
+
98
+ spec_string.split(":").each do |part|
99
+ if (part =~ /\A([a-zA-Z0-9]{3})(\|([a-z0-9\ \*]{2})\|)?([a-z0-9]*)?\Z/)
100
+ # variable field
101
+ tag, indicators, subfields = $1, $3, $4
102
+
103
+ hash[tag] ||= {}
104
+
105
+ if subfields
106
+ subfields.each_char do |subfield|
107
+ hash[tag][:subfields] ||= Array.new
108
+ hash[tag][:subfields] << subfield
109
+ end
110
+ end
111
+ if indicators
112
+ hash[tag][:indicators] = [ (indicators[0] if indicators[0] != "*"), (indicators[1] if indicators[1] != "*") ]
113
+ end
114
+ elsif (part =~ /\A([a-zA-Z0-9]{3})(\[(\d+)(-(\d+))?\])\Z/) # "005[4-5]"
115
+ tag, byte1, byte2 = $1, $3, $5
116
+ hash[tag] ||= {}
117
+
118
+ if byte1 && byte2
119
+ hash[tag][:bytes] = ((byte1.to_i)..(byte2.to_i))
120
+ elsif byte1
121
+ hash[tag][:bytes] = byte1.to_i
122
+ end
123
+ else
124
+ raise ArgumentError.new("Unrecognized marc extract specification: #{part}")
125
+ end
126
+ end
127
+
128
+ return hash
129
+ end
130
+
131
+
132
+ # Returns array of strings, extracted values
133
+ def extract
134
+ results = []
135
+
136
+ self.each_matching_line do |field, spec|
137
+ if control_field?(field)
138
+ results << (spec[:bytes] ? field.value.byteslice(spec[:bytes]) : field.value)
139
+ else
140
+ results.concat collect_subfields(field, spec)
141
+ end
142
+ end
143
+
144
+ return results
145
+ end
146
+
147
+ # Yields a block for every line in source record that matches
148
+ # spec. First arg to block is MARC::Field (control or data), second
149
+ # is the hash specification that it matched on. May take account
150
+ # of options such as :alternate_script
151
+ def each_matching_line
152
+ self.marc_record.each do |field|
153
+ if (spec = spec_covering_field(field)) && matches_indicators(field, spec)
154
+ yield(field, spec)
155
+ end
156
+ end
157
+ end
158
+
159
+ # Pass in a marc data field and a hash spec, returns
160
+ # an ARRAY of one or more strings, subfields extracted
161
+ # and processed per spec. Takes account of options such
162
+ # as :seperator
163
+ def collect_subfields(field, spec)
164
+ subfields = field.subfields.collect do |subfield|
165
+ subfield.value if spec[:subfields].nil? || spec[:subfields].include?(subfield.code)
166
+ end.compact
167
+
168
+ return options[:seperator] ? [ subfields.join( options[:seperator]) ] : subfields
169
+ end
170
+
171
+ # Is there a spec covering extraction from this field?
172
+ # May return true on 880's matching other tags depending
173
+ # on value of :alternate_script
174
+ # if :alternate_script is :only, will return original spec when field is an 880.
175
+ # otherwise will always return nil for 880s, you have to handle :alternate_script :include
176
+ # elsewhere, to add in the 880 in the right order
177
+ def spec_covering_field(field)
178
+ #require 'pry'
179
+ #binding.pry if field.tag == "880"
180
+
181
+ if field.tag == "880" && options[:alternate_script] != false
182
+ # pull out the spec for corresponding original marc tag this 880 corresponds to
183
+ # Due to bug in jruby https://github.com/jruby/jruby/issues/886 , we need
184
+ # to do this weird encode gymnastics, which fixes it for mysterious reasons.
185
+ orig_field = field["6"].encode(field["6"].encoding).byteslice(0,3)
186
+ field["6"] && self.spec_hash[ orig_field ]
187
+ elsif options[:alternate_script] != :only
188
+ self.spec_hash[field.tag]
189
+ end
190
+ end
191
+
192
+ def control_field?(field)
193
+ # should the MARC gem have a more efficient way to do this,
194
+ # define #control_field? on both ControlField and DataField?
195
+ return field.kind_of? MARC::ControlField
196
+ end
197
+
198
+ # a marc field, and an individual spec hash, {:subfields => array, :indicators => array}
199
+ def matches_indicators(field, spec)
200
+ return true if spec[:indicators].nil?
201
+
202
+ return (spec[:indicators][0].nil? || spec[:indicators][0] == field.indicator1) &&
203
+ (spec[:indicators][1].nil? || spec[:indicators][1] == field.indicator2)
204
+ end
205
+ end
206
+ end
@@ -0,0 +1,61 @@
1
+ require 'marc'
2
+
3
+ # A Reader class that can be used with Traject::Indexer.reader, to read
4
+ # MARC records.
5
+ #
6
+ # Includes Enumerable for convenience.
7
+ #
8
+ # Reads in Marc records using ruby marc. Depends on config variables to
9
+ # determine what serialization type to expect, and other parameters controlling
10
+ # de-serialization.
11
+ #
12
+ # Settings:
13
+ # ["marc_source.type"] serialization type. default 'binary'
14
+ # * "binary". Actual marc.
15
+ # * "xml", MarcXML
16
+ # * "json". (NOT YET IMPLEMENTED) The "marc-in-json" format, encoded as newline-seperated
17
+ # json. A simplistic newline-seperated json, with no comments
18
+ # allowed, and no unescpaed internal newlines allowed in the json
19
+ # objects -- we just read line by line, and assume each line is a
20
+ # marc-in-json. http://dilettantes.code4lib.org/blog/2010/09/a-proposal-to-serialize-marc-in-json/
21
+ # ["marc_source.xml_parser"] For XML type, which XML parser to tell Marc::Reader
22
+ # to use. Anything recognized by Marc::Reader :parser
23
+ # argument. By default, asks Marc::Reader to take
24
+ # it's best guess as to highest performance available
25
+ # installed option.
26
+ #
27
+ #
28
+ # Can NOT yet read Marc8, input is always assumed UTF8.
29
+ class Traject::MarcReader
30
+ include Enumerable
31
+
32
+ attr_reader :settings, :input_stream
33
+
34
+ @@best_xml_parser = MARC::XMLReader.best_available
35
+
36
+ def initialize(input_stream, settings)
37
+ @settings = settings
38
+ @input_stream = input_stream
39
+ end
40
+
41
+ # Creates proper kind of ruby MARC reader, depending
42
+ # on settings or guesses.
43
+ def internal_reader
44
+ unless defined? @internal_reader
45
+ @internal_reader =
46
+ case settings["marc_source.type"]
47
+ when "xml"
48
+ parser = settings["marc_source.xml_parser"] || @@best_xml_parser
49
+ MARC::XMLReader.new(self.input_stream, :parser=> parser)
50
+ else
51
+ MARC::Reader.new(self.input_stream)
52
+ end
53
+ end
54
+ return @internal_reader
55
+ end
56
+
57
+ def each(*args, &block)
58
+ self.internal_reader.each(*args, &block)
59
+ end
60
+
61
+ end
@@ -0,0 +1,30 @@
1
+ # From http://redcorundum.blogspot.com/2006/05/kernelqualifiedconstget.html
2
+ # Adapted into a module, rather than monkey patching it into Kernel
3
+ #
4
+ # Method to take a string constant name, including :: qualifications, and
5
+ # look up the actual constant. Looks up relative to current file.
6
+ # REspects leading ::. Etc.
7
+ module Traject::QualifiedConstGet
8
+
9
+
10
+ def qualified_const_get(str)
11
+ path = str.to_s.split('::')
12
+ from_root = path[0].empty?
13
+ if from_root
14
+ from_root = []
15
+ path = path[1..-1]
16
+ else
17
+ start_ns = ((Class === self)||(Module === self)) ? self : self.class
18
+ from_root = start_ns.to_s.split('::')
19
+ end
20
+ until from_root.empty?
21
+ begin
22
+ return (from_root+path).inject(Object) { |ns,name| ns.const_get(name) }
23
+ rescue NameError
24
+ from_root.delete_at(-1)
25
+ end
26
+ end
27
+ path.inject(Object) { |ns,name| ns.const_get(name) }
28
+ end
29
+
30
+ end
@@ -0,0 +1,120 @@
1
+ require 'traject'
2
+ require 'traject/qualified_const_get'
3
+
4
+ #
5
+ # Writes to a Solr using SolrJ, and the SolrJ HttpSolrServer.
6
+ # (sub-class later for the ConcurrentUpdate server?)
7
+ #
8
+ # settings:
9
+ # [solr.url] Your solr url (required)
10
+ # [solrj_writer.server_class_name] Defaults to "HttpSolrServer". You can specify
11
+ # another Solr Server sub-class, but it has
12
+ # to take a one-arg url constructor. Maybe
13
+ # subclass this writer class and overwrite
14
+ # instantiate_solr_server! otherwise
15
+ # [solrj.jar_dir] Custom directory containing all of the SolrJ jars. All
16
+ # jars in this dir will be loaded. Otherwise,
17
+ # we load our own packaged solrj jars. This setting
18
+ # can't really be used differently in the same app instance,
19
+ # since jars are loaded globally.
20
+ # [solrj_writer.parser_class_name] A String name of a class in package
21
+ # org.apache.solr.client.solrj.impl,
22
+ # we'll instantiate one with a zero-arg
23
+ # constructor, and pass it as an arg to setParser on
24
+ # the SolrServer instance, if present.
25
+ # NOTE: For contacting a Solr 1.x server, with the
26
+ # recent version of SolrJ used by default, set to
27
+ # "XMLResponseParser"
28
+ # [solrj_writer.commit_on_close] If true (or string 'true'), send a commit to solr
29
+ # at end of #process.
30
+ class Traject::SolrJWriter
31
+ include Traject::QualifiedConstGet
32
+
33
+ attr_reader :settings
34
+
35
+ def initialize(argSettings)
36
+ @settings = argSettings
37
+ settings_check!(settings)
38
+
39
+ ensure_solrj_loaded!
40
+
41
+ solr_server # init
42
+ end
43
+
44
+ # Loads solrj if not already loaded. By loading all jars found
45
+ # in settings["solrj.jar_dir"]
46
+ def ensure_solrj_loaded!
47
+ unless defined?(HttpSolrServer) && defined?(SolrInputDocument)
48
+ require 'java'
49
+
50
+ tries = 0
51
+ begin
52
+ tries += 1
53
+ java_import org.apache.solr.client.solrj.impl.HttpSolrServer
54
+ java_import org.apache.solr.common.SolrInputDocument
55
+ rescue NameError => e
56
+ # /Users/jrochkind/code/solrj-gem/lib"
57
+
58
+ included_jar_dir = File.expand_path("../../vendor/solrj/lib", File.dirname(__FILE__))
59
+
60
+ jardir = settings["solrj.jar_dir"] || included_jar_dir
61
+ Dir.glob("#{jardir}/*.jar") do |x|
62
+ require x
63
+ end
64
+ if tries > 1
65
+ raise LoadError.new("Can not find SolrJ java classes")
66
+ else
67
+ retry
68
+ end
69
+ end
70
+ end
71
+ end
72
+
73
+ def put(hash)
74
+ doc = SolrInputDocument.new
75
+
76
+ hash.each_pair do |key, value_array|
77
+ value_array.each do |value|
78
+ doc.addField( key, value )
79
+ end
80
+ end
81
+
82
+ # TODO: Buffer docs internally, add in arrays, one http
83
+ # transaction per array. Is what solrj wiki recommends.
84
+ solr_server.add(doc)
85
+ end
86
+
87
+ def close
88
+ solr_server.commit if settings["solrj_writer.commit_on_close"].to_s == "true"
89
+
90
+ solr_server.shutdown
91
+ @solr_server = nil
92
+ end
93
+
94
+
95
+ def solr_server
96
+ @solr_server ||= instantiate_solr_server!
97
+ end
98
+ attr_writer :solr_server # mainly for testing
99
+
100
+ # Instantiates a solr server of class settings["solrj_writer.server_class_name"] or "HttpSolrServer"
101
+ # and initializes it with settings["solr.url"]
102
+ def instantiate_solr_server!
103
+ server_class = qualified_const_get( settings["solrj_writer.server_class_name"] || "HttpSolrServer" )
104
+ server = server_class.new( settings["solr.url"].to_s );
105
+
106
+ if parser_name = settings["solrj_writer.parser_class_name"]
107
+ parser = org.apache.solr.client.solrj.impl.const_get(parser_name).new
108
+ server.setParser( parser )
109
+ end
110
+
111
+ server
112
+ end
113
+
114
+ def settings_check!(settings)
115
+ unless settings.has_key?("solr.url") && ! settings["solr.url"].nil?
116
+ raise ArgumentError.new("SolrJWriter requires a 'solr.url' solr url in settings")
117
+ end
118
+ end
119
+
120
+ end
@@ -0,0 +1,184 @@
1
+ require 'traject'
2
+
3
+ require 'yaml'
4
+
5
+
6
+ module Traject
7
+ # A TranslationMap is basically just something that has a hash-like #[]
8
+ # method to map from input strings to output strings:
9
+ #
10
+ # translation_map["some_input"] #=> some_output
11
+ #
12
+ # Input is assumed to always be string, output is either string
13
+ # or array of strings.
14
+ #
15
+ # What makes it more useful than a stunted hash is it's ability to load
16
+ # the hash definitions from configuration files, either pure ruby or
17
+ # yaml.
18
+ #
19
+ # TranslationMap.new("dir/some_file")
20
+ #
21
+ # Will look through the entire ruby $LOAD_PATH, for a translation_maps subdir
22
+ # that contains either some_file.rb OR some_file.yaml
23
+ # * Looks for "/translation_maps" subdir in load paths, so
24
+ # for instance you can have a gem that keeps translation maps
25
+ # in ./lib/translation_maps, and it Just Works.
26
+ # * Note you do NOT supply the ".rb" or ".yaml" suffix yourself,
27
+ # it'll use whichever it finds (allows calling code to not care which is used).
28
+ #
29
+ # Ruby files just need to have their last line eval to a hash. They file
30
+ # will be run through `eval`, don't do it with untrusted content (naturally)
31
+ #
32
+ # You can also pass in a Hash for consistency to TranslationMap.new, although
33
+ # I don't know why you'd want to.
34
+ #
35
+ # == Special default handling
36
+ #
37
+ # The key "__default__" in the hash is treated specially. If set to a string,
38
+ # that string will be returned by the TranslationMap for any input not otherwise
39
+ # included. If set to the special string "__passthrough__", then for input not
40
+ # mapped, the original input string will be returned.
41
+ #
42
+ # This is most useful for YAML definition files, if you are using an actual ruby
43
+ # hash, you could just set the hash to do what you want using Hash#default_proc
44
+ # etc.
45
+ #
46
+ # Or, when calling TranslationMap.new(), you can pass in options over-riding special
47
+ # key too:
48
+ #
49
+ # TranslationMap.new("something", :default => "foo")
50
+ # TranslationMap.new("something", :default => :passthrough)
51
+ #
52
+ # == Output: String or array of strings
53
+ #
54
+ # The output can be a string or an array of strings, or nil. It should not be anything
55
+ # When used with the #translate_array! method, one string can be replaced by multiple values
56
+ # (array of strings) or removed (nil)
57
+ #
58
+ # == Caching
59
+ # Lookup and loading of configuration files will be cached, for efficiency.
60
+ # You can reset with `TranslationMap.reset_cache!`
61
+ #
62
+ # == YAML example:
63
+ #
64
+ # key: value
65
+ # key2: value2 multiple words fine
66
+ # key2b: "Although you can use quotes if you want: Or need."
67
+ # key3:
68
+ # - array
69
+ # - of
70
+ # - values look like this
71
+ class TranslationMap
72
+ class Cache
73
+ def initialize
74
+ @cached = Hash.new
75
+ end
76
+
77
+ # Returns an actual Hash -- or nil if none found.
78
+ def lookup(path)
79
+ unless @cached.has_key?(path)
80
+ @cached[path] = _lookup!(path)
81
+ end
82
+ return @cached[path]
83
+ end
84
+
85
+ # force lookup, without using cache.
86
+ # used by cache. Returns the actual hash.
87
+ # Returns nil if none found.
88
+ # May raise on syntax error in file being loaded.
89
+ def _lookup!(path)
90
+ found = nil
91
+
92
+ $LOAD_PATH.each do |base|
93
+ rb_file = File.join( base, "translation_maps", "#{path}.rb" )
94
+ yaml_file = File.join( base, "translation_maps", "#{path}.yaml" )
95
+
96
+ if File.exists? rb_file
97
+ found = eval( File.open(rb_file).read , binding, rb_file )
98
+ break
99
+ elsif File.exists? yaml_file
100
+ found = YAML.load_file(yaml_file)
101
+ end
102
+ end
103
+
104
+ return found
105
+ end
106
+
107
+ def reset_cache!
108
+ @cached.clear
109
+ end
110
+
111
+ end
112
+
113
+ attr_reader :hash
114
+ attr_reader :default
115
+
116
+ class << self
117
+ attr_accessor :cache
118
+ def reset_cache!
119
+ cache.reset_cache!
120
+ end
121
+ end
122
+ self.cache = Cache.new
123
+
124
+
125
+ def initialize(defn, options = {})
126
+ if defn.kind_of? Hash
127
+ @hash = defn
128
+ else
129
+ @hash = self.class.cache.lookup(defn)
130
+ raise NotFound.new(defn) if @hash.nil?
131
+ end
132
+
133
+ if options[:default]
134
+ @default = options[:default]
135
+ elsif @hash.has_key? "__default__"
136
+ @default = @hash.delete("__default__")
137
+ end
138
+ end
139
+
140
+ def [](key)
141
+ if self.default && (! @hash.has_key?(key))
142
+ if self.default == "__passthrough__"
143
+ return key
144
+ else
145
+ return self.default
146
+ end
147
+ end
148
+
149
+ @hash[key]
150
+ end
151
+ alias_method :map, :[]
152
+
153
+ # Run every element of an array through this translation map,
154
+ # return the resulting array. If translation map returns nil,
155
+ # original element will be missing from output.
156
+ #
157
+ # If an input maps to an array, each element of the array will be flattened
158
+ # into the output.
159
+ #
160
+ # If an input maps to nil, it will cause the input element to be removed
161
+ # entirely.
162
+ def translate_array(array)
163
+ array.each_with_object([]) do |input_element, output_array|
164
+ output_element = self.map(input_element)
165
+ if output_element.kind_of? Array
166
+ output_array.concat output_element
167
+ elsif ! output_element.nil?
168
+ output_array << output_element
169
+ end
170
+ end
171
+ end
172
+
173
+ def translate_array!(array)
174
+ array.replace( self.translate_array(array))
175
+ end
176
+
177
+ class NotFound < Exception
178
+ def initialize(path)
179
+ super("No translation map definition file found at '#{path}[.rb|.yaml]' in load path")
180
+ end
181
+ end
182
+
183
+ end
184
+ end