traject 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (64) hide show
  1. data/.gitignore +18 -0
  2. data/Gemfile +4 -0
  3. data/LICENSE.txt +22 -0
  4. data/README.md +346 -0
  5. data/Rakefile +16 -0
  6. data/bin/traject +153 -0
  7. data/doc/macros.md +103 -0
  8. data/doc/settings.md +34 -0
  9. data/lib/traject.rb +10 -0
  10. data/lib/traject/indexer.rb +196 -0
  11. data/lib/traject/json_writer.rb +51 -0
  12. data/lib/traject/macros/basic.rb +9 -0
  13. data/lib/traject/macros/marc21.rb +145 -0
  14. data/lib/traject/marc_extractor.rb +206 -0
  15. data/lib/traject/marc_reader.rb +61 -0
  16. data/lib/traject/qualified_const_get.rb +30 -0
  17. data/lib/traject/solrj_writer.rb +120 -0
  18. data/lib/traject/translation_map.rb +184 -0
  19. data/lib/traject/version.rb +3 -0
  20. data/test/indexer/macros_marc21_test.rb +146 -0
  21. data/test/indexer/macros_test.rb +40 -0
  22. data/test/indexer/map_record_test.rb +120 -0
  23. data/test/indexer/read_write_test.rb +47 -0
  24. data/test/indexer/settings_test.rb +65 -0
  25. data/test/marc_extractor_test.rb +168 -0
  26. data/test/marc_reader_test.rb +29 -0
  27. data/test/solrj_writer_test.rb +106 -0
  28. data/test/test_helper.rb +28 -0
  29. data/test/test_support/hebrew880s.marc +1 -0
  30. data/test/test_support/manufacturing_consent.marc +1 -0
  31. data/test/test_support/test_data.utf8.marc.xml +2609 -0
  32. data/test/test_support/test_data.utf8.mrc +1 -0
  33. data/test/translation_map_test.rb +98 -0
  34. data/test/translation_maps/bad_ruby.rb +8 -0
  35. data/test/translation_maps/bad_yaml.yaml +1 -0
  36. data/test/translation_maps/both_map.rb +1 -0
  37. data/test/translation_maps/both_map.yaml +1 -0
  38. data/test/translation_maps/default_literal.rb +10 -0
  39. data/test/translation_maps/default_passthrough.rb +10 -0
  40. data/test/translation_maps/marc_040a_translate_test.yaml +1 -0
  41. data/test/translation_maps/ruby_map.rb +10 -0
  42. data/test/translation_maps/translate_array_test.yaml +8 -0
  43. data/test/translation_maps/yaml_map.yaml +7 -0
  44. data/traject.gemspec +30 -0
  45. data/vendor/solrj/README +8 -0
  46. data/vendor/solrj/build.xml +39 -0
  47. data/vendor/solrj/ivy.xml +16 -0
  48. data/vendor/solrj/lib/commons-codec-1.7.jar +0 -0
  49. data/vendor/solrj/lib/commons-io-2.1.jar +0 -0
  50. data/vendor/solrj/lib/httpclient-4.2.3.jar +0 -0
  51. data/vendor/solrj/lib/httpcore-4.2.2.jar +0 -0
  52. data/vendor/solrj/lib/httpmime-4.2.3.jar +0 -0
  53. data/vendor/solrj/lib/jcl-over-slf4j-1.6.6.jar +0 -0
  54. data/vendor/solrj/lib/jul-to-slf4j-1.6.6.jar +0 -0
  55. data/vendor/solrj/lib/log4j-1.2.16.jar +0 -0
  56. data/vendor/solrj/lib/noggit-0.5.jar +0 -0
  57. data/vendor/solrj/lib/slf4j-api-1.6.6.jar +0 -0
  58. data/vendor/solrj/lib/slf4j-log4j12-1.6.6.jar +0 -0
  59. data/vendor/solrj/lib/solr-solrj-4.3.1-javadoc.jar +0 -0
  60. data/vendor/solrj/lib/solr-solrj-4.3.1-sources.jar +0 -0
  61. data/vendor/solrj/lib/solr-solrj-4.3.1.jar +0 -0
  62. data/vendor/solrj/lib/wstx-asl-3.2.7.jar +0 -0
  63. data/vendor/solrj/lib/zookeeper-3.4.5.jar +0 -0
  64. metadata +264 -0
@@ -0,0 +1,206 @@
1
+
2
+
3
+ module Traject
4
+ # MarcExtractor is a class for extracting lists of strings from a MARC::Record,
5
+ # according to specifications. See #parse_string_spec for description of string
6
+ # string arguments used to specify extraction. See #initialize for options
7
+ # that can be set controlling extraction.
8
+ #
9
+ # Examples:
10
+ #
11
+ # array_of_stuff = MarcExtractor.new(marc_record, "001:245abc:700a").extract
12
+ # values = MarcExtractor.new(marc_record, "040a", :seperator => nil).extract
13
+ #
14
+ class MarcExtractor
15
+ attr_accessor :options, :marc_record, :spec_hash
16
+
17
+
18
+ # Convenience method to construct a MarcExtractor object and
19
+ # run extract on it.
20
+ #
21
+ # First arg is a marc record.
22
+ #
23
+ # Second arg is either a string that will be given to parse_string_spec,
24
+ # OR a hash that's the return value of parse_string_spec.
25
+ #
26
+ # Third arg is an optional options hash that will be passed as
27
+ # third arg of MarcExtractor constructor.
28
+ def self.extract_by_spec(marc_record, specification, options = {})
29
+ (raise IllegalArgument, "first argument must not be nil") if marc_record.nil?
30
+
31
+ unless specification.kind_of? Hash
32
+ specification = self.parse_string_spec(specification)
33
+ end
34
+
35
+ Traject::MarcExtractor.new(marc_record, specification, options).extract
36
+ end
37
+
38
+ # Take a hash that's the output of #parse_string_spec, return
39
+ # an array of strings extracted from a marc record accordingly
40
+ #
41
+ # options:
42
+ #
43
+ # [:seperator] default ' ' (space), what to use to seperate
44
+ # subfield values when joining strings
45
+ #
46
+ # [:alternate_script] default :include, include linked 880s for tags
47
+ # that match spec. Also:
48
+ # * false => do not include.
49
+ # * :only => only include linked 880s, not original
50
+ def initialize(marc_record, spec_hash, options = {})
51
+ self.options = {
52
+ :seperator => ' ',
53
+ :alternate_script => :include
54
+ }.merge(options)
55
+
56
+ raise IllegalArgumentException("second arg to MarcExtractor.new must be a Hash specification object") unless spec_hash.kind_of? Hash
57
+
58
+ self.marc_record = marc_record
59
+ self.spec_hash = spec_hash
60
+ end
61
+
62
+ # Converts from a string marc spec like "245abc:700a" to a nested hash used internally
63
+ # to represent the specification.
64
+ #
65
+ # a String specification is a string of form:
66
+ # {tag}{|indicators|}{subfields} seperated by colons
67
+ # tag is three chars (usually but not neccesarily numeric),
68
+ # indicators are optional two chars prefixed by hyphen,
69
+ # subfields are optional list of chars (alphanumeric)
70
+ #
71
+ # indicator spec must be two chars, but one can be * meaning "don't care".
72
+ # space to mean 'blank'
73
+ #
74
+ # "245|01|abc65:345abc:700|*5|:800"
75
+ #
76
+ # Or, for control (fixed) fields (ordinarily fields 001-010), you can include a byte slice specification,
77
+ # but can NOT include subfield or indicator specifications. Plus can use special tag "LDR" for
78
+ # the marc leader. (TODO)
79
+ #
80
+ # "008[35-37]:LDR[5]"
81
+ # => bytes 35-37 inclusive of field 008, and byte 5 of the marc leader.
82
+ #
83
+ # Returns a nested hash keyed by tags.
84
+ # { tag => {
85
+ # :subfields => ['a', 'b', '2'] # actually, a SET. may be empty or nil
86
+ # :indicators => ['1', '0'] # An array. may be empty or nil; duple, either one can be nil
87
+ # }
88
+ #}
89
+ # For byte offsets, :bytes => 12 or :bytes => (7..10)
90
+ #
91
+ # * subfields and indicators can only be provided for marc data/variable fields
92
+ # * byte slice can only be provided for marc control fields (generally tags less than 010)
93
+ #
94
+ # See tests for more examples.
95
+ def self.parse_string_spec(spec_string)
96
+ hash = {}
97
+
98
+ spec_string.split(":").each do |part|
99
+ if (part =~ /\A([a-zA-Z0-9]{3})(\|([a-z0-9\ \*]{2})\|)?([a-z0-9]*)?\Z/)
100
+ # variable field
101
+ tag, indicators, subfields = $1, $3, $4
102
+
103
+ hash[tag] ||= {}
104
+
105
+ if subfields
106
+ subfields.each_char do |subfield|
107
+ hash[tag][:subfields] ||= Array.new
108
+ hash[tag][:subfields] << subfield
109
+ end
110
+ end
111
+ if indicators
112
+ hash[tag][:indicators] = [ (indicators[0] if indicators[0] != "*"), (indicators[1] if indicators[1] != "*") ]
113
+ end
114
+ elsif (part =~ /\A([a-zA-Z0-9]{3})(\[(\d+)(-(\d+))?\])\Z/) # "005[4-5]"
115
+ tag, byte1, byte2 = $1, $3, $5
116
+ hash[tag] ||= {}
117
+
118
+ if byte1 && byte2
119
+ hash[tag][:bytes] = ((byte1.to_i)..(byte2.to_i))
120
+ elsif byte1
121
+ hash[tag][:bytes] = byte1.to_i
122
+ end
123
+ else
124
+ raise ArgumentError.new("Unrecognized marc extract specification: #{part}")
125
+ end
126
+ end
127
+
128
+ return hash
129
+ end
130
+
131
+
132
+ # Returns array of strings, extracted values
133
+ def extract
134
+ results = []
135
+
136
+ self.each_matching_line do |field, spec|
137
+ if control_field?(field)
138
+ results << (spec[:bytes] ? field.value.byteslice(spec[:bytes]) : field.value)
139
+ else
140
+ results.concat collect_subfields(field, spec)
141
+ end
142
+ end
143
+
144
+ return results
145
+ end
146
+
147
+ # Yields a block for every line in source record that matches
148
+ # spec. First arg to block is MARC::Field (control or data), second
149
+ # is the hash specification that it matched on. May take account
150
+ # of options such as :alternate_script
151
+ def each_matching_line
152
+ self.marc_record.each do |field|
153
+ if (spec = spec_covering_field(field)) && matches_indicators(field, spec)
154
+ yield(field, spec)
155
+ end
156
+ end
157
+ end
158
+
159
+ # Pass in a marc data field and a hash spec, returns
160
+ # an ARRAY of one or more strings, subfields extracted
161
+ # and processed per spec. Takes account of options such
162
+ # as :seperator
163
+ def collect_subfields(field, spec)
164
+ subfields = field.subfields.collect do |subfield|
165
+ subfield.value if spec[:subfields].nil? || spec[:subfields].include?(subfield.code)
166
+ end.compact
167
+
168
+ return options[:seperator] ? [ subfields.join( options[:seperator]) ] : subfields
169
+ end
170
+
171
+ # Is there a spec covering extraction from this field?
172
+ # May return true on 880's matching other tags depending
173
+ # on value of :alternate_script
174
+ # if :alternate_script is :only, will return original spec when field is an 880.
175
+ # otherwise will always return nil for 880s, you have to handle :alternate_script :include
176
+ # elsewhere, to add in the 880 in the right order
177
+ def spec_covering_field(field)
178
+ #require 'pry'
179
+ #binding.pry if field.tag == "880"
180
+
181
+ if field.tag == "880" && options[:alternate_script] != false
182
+ # pull out the spec for corresponding original marc tag this 880 corresponds to
183
+ # Due to bug in jruby https://github.com/jruby/jruby/issues/886 , we need
184
+ # to do this weird encode gymnastics, which fixes it for mysterious reasons.
185
+ orig_field = field["6"].encode(field["6"].encoding).byteslice(0,3)
186
+ field["6"] && self.spec_hash[ orig_field ]
187
+ elsif options[:alternate_script] != :only
188
+ self.spec_hash[field.tag]
189
+ end
190
+ end
191
+
192
+ def control_field?(field)
193
+ # should the MARC gem have a more efficient way to do this,
194
+ # define #control_field? on both ControlField and DataField?
195
+ return field.kind_of? MARC::ControlField
196
+ end
197
+
198
+ # a marc field, and an individual spec hash, {:subfields => array, :indicators => array}
199
+ def matches_indicators(field, spec)
200
+ return true if spec[:indicators].nil?
201
+
202
+ return (spec[:indicators][0].nil? || spec[:indicators][0] == field.indicator1) &&
203
+ (spec[:indicators][1].nil? || spec[:indicators][1] == field.indicator2)
204
+ end
205
+ end
206
+ end
@@ -0,0 +1,61 @@
1
+ require 'marc'
2
+
3
+ # A Reader class that can be used with Traject::Indexer.reader, to read
4
+ # MARC records.
5
+ #
6
+ # Includes Enumerable for convenience.
7
+ #
8
+ # Reads in Marc records using ruby marc. Depends on config variables to
9
+ # determine what serialization type to expect, and other parameters controlling
10
+ # de-serialization.
11
+ #
12
+ # Settings:
13
+ # ["marc_source.type"] serialization type. default 'binary'
14
+ # * "binary". Actual marc.
15
+ # * "xml", MarcXML
16
+ # * "json". (NOT YET IMPLEMENTED) The "marc-in-json" format, encoded as newline-seperated
17
+ # json. A simplistic newline-seperated json, with no comments
18
+ # allowed, and no unescpaed internal newlines allowed in the json
19
+ # objects -- we just read line by line, and assume each line is a
20
+ # marc-in-json. http://dilettantes.code4lib.org/blog/2010/09/a-proposal-to-serialize-marc-in-json/
21
+ # ["marc_source.xml_parser"] For XML type, which XML parser to tell Marc::Reader
22
+ # to use. Anything recognized by Marc::Reader :parser
23
+ # argument. By default, asks Marc::Reader to take
24
+ # it's best guess as to highest performance available
25
+ # installed option.
26
+ #
27
+ #
28
+ # Can NOT yet read Marc8, input is always assumed UTF8.
29
+ class Traject::MarcReader
30
+ include Enumerable
31
+
32
+ attr_reader :settings, :input_stream
33
+
34
+ @@best_xml_parser = MARC::XMLReader.best_available
35
+
36
+ def initialize(input_stream, settings)
37
+ @settings = settings
38
+ @input_stream = input_stream
39
+ end
40
+
41
+ # Creates proper kind of ruby MARC reader, depending
42
+ # on settings or guesses.
43
+ def internal_reader
44
+ unless defined? @internal_reader
45
+ @internal_reader =
46
+ case settings["marc_source.type"]
47
+ when "xml"
48
+ parser = settings["marc_source.xml_parser"] || @@best_xml_parser
49
+ MARC::XMLReader.new(self.input_stream, :parser=> parser)
50
+ else
51
+ MARC::Reader.new(self.input_stream)
52
+ end
53
+ end
54
+ return @internal_reader
55
+ end
56
+
57
+ def each(*args, &block)
58
+ self.internal_reader.each(*args, &block)
59
+ end
60
+
61
+ end
@@ -0,0 +1,30 @@
1
+ # From http://redcorundum.blogspot.com/2006/05/kernelqualifiedconstget.html
2
+ # Adapted into a module, rather than monkey patching it into Kernel
3
+ #
4
+ # Method to take a string constant name, including :: qualifications, and
5
+ # look up the actual constant. Looks up relative to current file.
6
+ # REspects leading ::. Etc.
7
+ module Traject::QualifiedConstGet
8
+
9
+
10
+ def qualified_const_get(str)
11
+ path = str.to_s.split('::')
12
+ from_root = path[0].empty?
13
+ if from_root
14
+ from_root = []
15
+ path = path[1..-1]
16
+ else
17
+ start_ns = ((Class === self)||(Module === self)) ? self : self.class
18
+ from_root = start_ns.to_s.split('::')
19
+ end
20
+ until from_root.empty?
21
+ begin
22
+ return (from_root+path).inject(Object) { |ns,name| ns.const_get(name) }
23
+ rescue NameError
24
+ from_root.delete_at(-1)
25
+ end
26
+ end
27
+ path.inject(Object) { |ns,name| ns.const_get(name) }
28
+ end
29
+
30
+ end
@@ -0,0 +1,120 @@
1
+ require 'traject'
2
+ require 'traject/qualified_const_get'
3
+
4
+ #
5
+ # Writes to a Solr using SolrJ, and the SolrJ HttpSolrServer.
6
+ # (sub-class later for the ConcurrentUpdate server?)
7
+ #
8
+ # settings:
9
+ # [solr.url] Your solr url (required)
10
+ # [solrj_writer.server_class_name] Defaults to "HttpSolrServer". You can specify
11
+ # another Solr Server sub-class, but it has
12
+ # to take a one-arg url constructor. Maybe
13
+ # subclass this writer class and overwrite
14
+ # instantiate_solr_server! otherwise
15
+ # [solrj.jar_dir] Custom directory containing all of the SolrJ jars. All
16
+ # jars in this dir will be loaded. Otherwise,
17
+ # we load our own packaged solrj jars. This setting
18
+ # can't really be used differently in the same app instance,
19
+ # since jars are loaded globally.
20
+ # [solrj_writer.parser_class_name] A String name of a class in package
21
+ # org.apache.solr.client.solrj.impl,
22
+ # we'll instantiate one with a zero-arg
23
+ # constructor, and pass it as an arg to setParser on
24
+ # the SolrServer instance, if present.
25
+ # NOTE: For contacting a Solr 1.x server, with the
26
+ # recent version of SolrJ used by default, set to
27
+ # "XMLResponseParser"
28
+ # [solrj_writer.commit_on_close] If true (or string 'true'), send a commit to solr
29
+ # at end of #process.
30
+ class Traject::SolrJWriter
31
+ include Traject::QualifiedConstGet
32
+
33
+ attr_reader :settings
34
+
35
+ def initialize(argSettings)
36
+ @settings = argSettings
37
+ settings_check!(settings)
38
+
39
+ ensure_solrj_loaded!
40
+
41
+ solr_server # init
42
+ end
43
+
44
+ # Loads solrj if not already loaded. By loading all jars found
45
+ # in settings["solrj.jar_dir"]
46
+ def ensure_solrj_loaded!
47
+ unless defined?(HttpSolrServer) && defined?(SolrInputDocument)
48
+ require 'java'
49
+
50
+ tries = 0
51
+ begin
52
+ tries += 1
53
+ java_import org.apache.solr.client.solrj.impl.HttpSolrServer
54
+ java_import org.apache.solr.common.SolrInputDocument
55
+ rescue NameError => e
56
+ # /Users/jrochkind/code/solrj-gem/lib"
57
+
58
+ included_jar_dir = File.expand_path("../../vendor/solrj/lib", File.dirname(__FILE__))
59
+
60
+ jardir = settings["solrj.jar_dir"] || included_jar_dir
61
+ Dir.glob("#{jardir}/*.jar") do |x|
62
+ require x
63
+ end
64
+ if tries > 1
65
+ raise LoadError.new("Can not find SolrJ java classes")
66
+ else
67
+ retry
68
+ end
69
+ end
70
+ end
71
+ end
72
+
73
+ def put(hash)
74
+ doc = SolrInputDocument.new
75
+
76
+ hash.each_pair do |key, value_array|
77
+ value_array.each do |value|
78
+ doc.addField( key, value )
79
+ end
80
+ end
81
+
82
+ # TODO: Buffer docs internally, add in arrays, one http
83
+ # transaction per array. Is what solrj wiki recommends.
84
+ solr_server.add(doc)
85
+ end
86
+
87
+ def close
88
+ solr_server.commit if settings["solrj_writer.commit_on_close"].to_s == "true"
89
+
90
+ solr_server.shutdown
91
+ @solr_server = nil
92
+ end
93
+
94
+
95
+ def solr_server
96
+ @solr_server ||= instantiate_solr_server!
97
+ end
98
+ attr_writer :solr_server # mainly for testing
99
+
100
+ # Instantiates a solr server of class settings["solrj_writer.server_class_name"] or "HttpSolrServer"
101
+ # and initializes it with settings["solr.url"]
102
+ def instantiate_solr_server!
103
+ server_class = qualified_const_get( settings["solrj_writer.server_class_name"] || "HttpSolrServer" )
104
+ server = server_class.new( settings["solr.url"].to_s );
105
+
106
+ if parser_name = settings["solrj_writer.parser_class_name"]
107
+ parser = org.apache.solr.client.solrj.impl.const_get(parser_name).new
108
+ server.setParser( parser )
109
+ end
110
+
111
+ server
112
+ end
113
+
114
+ def settings_check!(settings)
115
+ unless settings.has_key?("solr.url") && ! settings["solr.url"].nil?
116
+ raise ArgumentError.new("SolrJWriter requires a 'solr.url' solr url in settings")
117
+ end
118
+ end
119
+
120
+ end
@@ -0,0 +1,184 @@
1
+ require 'traject'
2
+
3
+ require 'yaml'
4
+
5
+
6
+ module Traject
7
+ # A TranslationMap is basically just something that has a hash-like #[]
8
+ # method to map from input strings to output strings:
9
+ #
10
+ # translation_map["some_input"] #=> some_output
11
+ #
12
+ # Input is assumed to always be string, output is either string
13
+ # or array of strings.
14
+ #
15
+ # What makes it more useful than a stunted hash is it's ability to load
16
+ # the hash definitions from configuration files, either pure ruby or
17
+ # yaml.
18
+ #
19
+ # TranslationMap.new("dir/some_file")
20
+ #
21
+ # Will look through the entire ruby $LOAD_PATH, for a translation_maps subdir
22
+ # that contains either some_file.rb OR some_file.yaml
23
+ # * Looks for "/translation_maps" subdir in load paths, so
24
+ # for instance you can have a gem that keeps translation maps
25
+ # in ./lib/translation_maps, and it Just Works.
26
+ # * Note you do NOT supply the ".rb" or ".yaml" suffix yourself,
27
+ # it'll use whichever it finds (allows calling code to not care which is used).
28
+ #
29
+ # Ruby files just need to have their last line eval to a hash. They file
30
+ # will be run through `eval`, don't do it with untrusted content (naturally)
31
+ #
32
+ # You can also pass in a Hash for consistency to TranslationMap.new, although
33
+ # I don't know why you'd want to.
34
+ #
35
+ # == Special default handling
36
+ #
37
+ # The key "__default__" in the hash is treated specially. If set to a string,
38
+ # that string will be returned by the TranslationMap for any input not otherwise
39
+ # included. If set to the special string "__passthrough__", then for input not
40
+ # mapped, the original input string will be returned.
41
+ #
42
+ # This is most useful for YAML definition files, if you are using an actual ruby
43
+ # hash, you could just set the hash to do what you want using Hash#default_proc
44
+ # etc.
45
+ #
46
+ # Or, when calling TranslationMap.new(), you can pass in options over-riding special
47
+ # key too:
48
+ #
49
+ # TranslationMap.new("something", :default => "foo")
50
+ # TranslationMap.new("something", :default => :passthrough)
51
+ #
52
+ # == Output: String or array of strings
53
+ #
54
+ # The output can be a string or an array of strings, or nil. It should not be anything
55
+ # When used with the #translate_array! method, one string can be replaced by multiple values
56
+ # (array of strings) or removed (nil)
57
+ #
58
+ # == Caching
59
+ # Lookup and loading of configuration files will be cached, for efficiency.
60
+ # You can reset with `TranslationMap.reset_cache!`
61
+ #
62
+ # == YAML example:
63
+ #
64
+ # key: value
65
+ # key2: value2 multiple words fine
66
+ # key2b: "Although you can use quotes if you want: Or need."
67
+ # key3:
68
+ # - array
69
+ # - of
70
+ # - values look like this
71
+ class TranslationMap
72
+ class Cache
73
+ def initialize
74
+ @cached = Hash.new
75
+ end
76
+
77
+ # Returns an actual Hash -- or nil if none found.
78
+ def lookup(path)
79
+ unless @cached.has_key?(path)
80
+ @cached[path] = _lookup!(path)
81
+ end
82
+ return @cached[path]
83
+ end
84
+
85
+ # force lookup, without using cache.
86
+ # used by cache. Returns the actual hash.
87
+ # Returns nil if none found.
88
+ # May raise on syntax error in file being loaded.
89
+ def _lookup!(path)
90
+ found = nil
91
+
92
+ $LOAD_PATH.each do |base|
93
+ rb_file = File.join( base, "translation_maps", "#{path}.rb" )
94
+ yaml_file = File.join( base, "translation_maps", "#{path}.yaml" )
95
+
96
+ if File.exists? rb_file
97
+ found = eval( File.open(rb_file).read , binding, rb_file )
98
+ break
99
+ elsif File.exists? yaml_file
100
+ found = YAML.load_file(yaml_file)
101
+ end
102
+ end
103
+
104
+ return found
105
+ end
106
+
107
+ def reset_cache!
108
+ @cached.clear
109
+ end
110
+
111
+ end
112
+
113
+ attr_reader :hash
114
+ attr_reader :default
115
+
116
+ class << self
117
+ attr_accessor :cache
118
+ def reset_cache!
119
+ cache.reset_cache!
120
+ end
121
+ end
122
+ self.cache = Cache.new
123
+
124
+
125
+ def initialize(defn, options = {})
126
+ if defn.kind_of? Hash
127
+ @hash = defn
128
+ else
129
+ @hash = self.class.cache.lookup(defn)
130
+ raise NotFound.new(defn) if @hash.nil?
131
+ end
132
+
133
+ if options[:default]
134
+ @default = options[:default]
135
+ elsif @hash.has_key? "__default__"
136
+ @default = @hash.delete("__default__")
137
+ end
138
+ end
139
+
140
+ def [](key)
141
+ if self.default && (! @hash.has_key?(key))
142
+ if self.default == "__passthrough__"
143
+ return key
144
+ else
145
+ return self.default
146
+ end
147
+ end
148
+
149
+ @hash[key]
150
+ end
151
+ alias_method :map, :[]
152
+
153
+ # Run every element of an array through this translation map,
154
+ # return the resulting array. If translation map returns nil,
155
+ # original element will be missing from output.
156
+ #
157
+ # If an input maps to an array, each element of the array will be flattened
158
+ # into the output.
159
+ #
160
+ # If an input maps to nil, it will cause the input element to be removed
161
+ # entirely.
162
+ def translate_array(array)
163
+ array.each_with_object([]) do |input_element, output_array|
164
+ output_element = self.map(input_element)
165
+ if output_element.kind_of? Array
166
+ output_array.concat output_element
167
+ elsif ! output_element.nil?
168
+ output_array << output_element
169
+ end
170
+ end
171
+ end
172
+
173
+ def translate_array!(array)
174
+ array.replace( self.translate_array(array))
175
+ end
176
+
177
+ class NotFound < Exception
178
+ def initialize(path)
179
+ super("No translation map definition file found at '#{path}[.rb|.yaml]' in load path")
180
+ end
181
+ end
182
+
183
+ end
184
+ end