traject 2.0.0-java
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +18 -0
- data/.travis.yml +27 -0
- data/.yardopts +3 -0
- data/Gemfile +12 -0
- data/LICENSE.txt +20 -0
- data/README.md +461 -0
- data/Rakefile +21 -0
- data/bench/bench.rb +30 -0
- data/bin/traject +16 -0
- data/doc/batch_execution.md +243 -0
- data/doc/extending.md +190 -0
- data/doc/indexing_rules.md +265 -0
- data/doc/other_commands.md +47 -0
- data/doc/settings.md +101 -0
- data/lib/tasks/load_maps.rake +48 -0
- data/lib/traject.rb +11 -0
- data/lib/traject/command_line.rb +301 -0
- data/lib/traject/csv_writer.rb +34 -0
- data/lib/traject/debug_writer.rb +47 -0
- data/lib/traject/delimited_writer.rb +110 -0
- data/lib/traject/indexer.rb +613 -0
- data/lib/traject/indexer/settings.rb +110 -0
- data/lib/traject/json_writer.rb +51 -0
- data/lib/traject/line_writer.rb +63 -0
- data/lib/traject/macros/basic.rb +9 -0
- data/lib/traject/macros/marc21.rb +223 -0
- data/lib/traject/macros/marc21_semantics.rb +584 -0
- data/lib/traject/macros/marc_format_classifier.rb +197 -0
- data/lib/traject/marc_extractor.rb +410 -0
- data/lib/traject/marc_reader.rb +89 -0
- data/lib/traject/mock_reader.rb +97 -0
- data/lib/traject/ndj_reader.rb +40 -0
- data/lib/traject/null_writer.rb +22 -0
- data/lib/traject/qualified_const_get.rb +40 -0
- data/lib/traject/solr_json_writer.rb +277 -0
- data/lib/traject/thread_pool.rb +161 -0
- data/lib/traject/translation_map.rb +267 -0
- data/lib/traject/util.rb +52 -0
- data/lib/traject/version.rb +3 -0
- data/lib/traject/yaml_writer.rb +9 -0
- data/lib/translation_maps/lcc_top_level.yaml +26 -0
- data/lib/translation_maps/marc_genre_007.yaml +9 -0
- data/lib/translation_maps/marc_genre_leader.yaml +22 -0
- data/lib/translation_maps/marc_geographic.yaml +589 -0
- data/lib/translation_maps/marc_instruments.yaml +102 -0
- data/lib/translation_maps/marc_languages.yaml +490 -0
- data/test/debug_writer_test.rb +38 -0
- data/test/delimited_writer_test.rb +104 -0
- data/test/indexer/each_record_test.rb +59 -0
- data/test/indexer/macros_marc21_semantics_test.rb +391 -0
- data/test/indexer/macros_marc21_test.rb +190 -0
- data/test/indexer/macros_test.rb +40 -0
- data/test/indexer/map_record_test.rb +209 -0
- data/test/indexer/read_write_test.rb +101 -0
- data/test/indexer/settings_test.rb +152 -0
- data/test/indexer/to_field_test.rb +77 -0
- data/test/marc_extractor_test.rb +412 -0
- data/test/marc_format_classifier_test.rb +98 -0
- data/test/marc_reader_test.rb +110 -0
- data/test/solr_json_writer_test.rb +248 -0
- data/test/test_helper.rb +90 -0
- data/test/test_support/245_no_ab.marc +1 -0
- data/test/test_support/880_with_no_6.utf8.marc +1 -0
- data/test/test_support/bad_subfield_code.marc +1 -0
- data/test/test_support/bad_utf_byte.utf8.marc +1 -0
- data/test/test_support/date_resort_to_260.marc +1 -0
- data/test/test_support/date_type_r_missing_date2.marc +1 -0
- data/test/test_support/date_with_u.marc +1 -0
- data/test/test_support/demo_config.rb +155 -0
- data/test/test_support/emptyish_record.marc +1 -0
- data/test/test_support/escaped_character_reference.marc8.marc +1 -0
- data/test/test_support/george_eliot.marc +1 -0
- data/test/test_support/hebrew880s.marc +1 -0
- data/test/test_support/louis_armstrong.marc +1 -0
- data/test/test_support/manufacturing_consent.marc +1 -0
- data/test/test_support/manuscript_online_thesis.marc +1 -0
- data/test/test_support/microform_online_conference.marc +1 -0
- data/test/test_support/multi_era.marc +1 -0
- data/test/test_support/multi_geo.marc +1 -0
- data/test/test_support/musical_cage.marc +1 -0
- data/test/test_support/nature.marc +1 -0
- data/test/test_support/one-marc8.mrc +1 -0
- data/test/test_support/online_only.marc +1 -0
- data/test/test_support/packed_041a_lang.marc +1 -0
- data/test/test_support/test_data.utf8.json +30 -0
- data/test/test_support/test_data.utf8.marc.xml +2609 -0
- data/test/test_support/test_data.utf8.mrc +1 -0
- data/test/test_support/test_data.utf8.mrc.gz +0 -0
- data/test/test_support/the_business_ren.marc +1 -0
- data/test/translation_map_test.rb +225 -0
- data/test/translation_maps/bad_ruby.rb +8 -0
- data/test/translation_maps/bad_yaml.yaml +1 -0
- data/test/translation_maps/both_map.rb +1 -0
- data/test/translation_maps/both_map.yaml +1 -0
- data/test/translation_maps/default_literal.rb +10 -0
- data/test/translation_maps/default_passthrough.rb +10 -0
- data/test/translation_maps/marc_040a_translate_test.yaml +1 -0
- data/test/translation_maps/properties_map.properties +5 -0
- data/test/translation_maps/ruby_map.rb +10 -0
- data/test/translation_maps/translate_array_test.yaml +8 -0
- data/test/translation_maps/yaml_map.yaml +7 -0
- data/traject.gemspec +47 -0
- metadata +382 -0
@@ -0,0 +1,161 @@
|
|
1
|
+
require 'concurrent'
|
2
|
+
require 'thread' # for Queue
|
3
|
+
|
4
|
+
module Traject
|
5
|
+
# An abstraction wrapping a Concurrent::ThreadPool in some configuration choices
|
6
|
+
# and other apparatus. Concurrent::ThreadPool is a Java ThreadPool executor on
|
7
|
+
# jruby for performance, and is ruby-concurrent's own ruby implementation otherwise.
|
8
|
+
#
|
9
|
+
# 1) Initialize with chosen pool size -- we create fixed size pools, where
|
10
|
+
# core and max sizes are the same.
|
11
|
+
#
|
12
|
+
# 2) If initialized with nil or 0 for threadcount, no thread pool will actually
|
13
|
+
# be created, and work sent to the Traject::ThreadPool will just be executed
|
14
|
+
# in the caller thread. We call this a nil threadpool. One situation it can be useful
|
15
|
+
# is if you are running under MRI, where multi-core parallelism isn't available, so
|
16
|
+
# an actual threadpool may not be useful. (Although in some cases a thread pool,
|
17
|
+
# especially one with size 1, can be useful in MRI for I/O blocking operations)
|
18
|
+
#
|
19
|
+
# 3) Use the #maybe_in_threadpool method to send blocks to thread pool for
|
20
|
+
# execution -- if configurred with a nil threadcount, your block will just be
|
21
|
+
# executed in calling thread. Be careful to not refer to any non-local
|
22
|
+
# variables in the block, unless the variable has an object you can
|
23
|
+
# use thread-safely!
|
24
|
+
#
|
25
|
+
# 4) We configure our underlying Concurrent::ThreadPool
|
26
|
+
# with a work queue that will buffer up to (pool_size*3) tasks. If the queue is full,
|
27
|
+
# the underlying Concurrent::ThreadPool is set up to use the :caller_runs policy
|
28
|
+
# meaning the block will end up executing in caller's own thread. With the kind
|
29
|
+
# of work we're doing, where each unit of work is small and there are many of them--
|
30
|
+
# the :caller_runs policy serves as an effective 'back pressure' mechanism to keep
|
31
|
+
# the work queue from getting too large and exhausting memory, when producers are
|
32
|
+
# faster than consumers.
|
33
|
+
#
|
34
|
+
# 5) Any exceptions raised by pool-executed work are captured accumulated in a thread-safe
|
35
|
+
# manner, and can be re-raised in the thread of your choice by calling
|
36
|
+
# #raise_collected_exception!
|
37
|
+
#
|
38
|
+
# 6) When you are done with the threadpool, you can and must call
|
39
|
+
# #shutdown_and_wait, which will wait for all current queued work
|
40
|
+
# to complete, then return. You can not give any more work to the pool
|
41
|
+
# after you do this. By default it'll wait pretty much forever, which should
|
42
|
+
# be fine. If you never call shutdown, then queued or in-progress work
|
43
|
+
# may be abandoned when the program ends, which would be bad.
|
44
|
+
#
|
45
|
+
# 7) We will keep track of total times a block is run in thread pool, and
|
46
|
+
# total elapsed (wall) time of running all blocks, so an average_execution_ms
|
47
|
+
# time can be given. #average_execution_ms may be inaccurate if called when
|
48
|
+
# threads are still executing, as it's not entirely thread safe (may get
|
49
|
+
# an off by one as to total iterations)
|
50
|
+
class ThreadPool
|
51
|
+
attr_reader :pool_size, :queue_capacity
|
52
|
+
|
53
|
+
# First arg is pool size, 0 or nil and we'll be a null/no-op pool which executes
|
54
|
+
# work in caller thread.
|
55
|
+
def initialize(pool_size)
|
56
|
+
unless pool_size.nil? || pool_size == 0
|
57
|
+
@pool_size = pool_size.to_i
|
58
|
+
@queue_capacity = pool_size * 3
|
59
|
+
|
60
|
+
@thread_pool = Concurrent::ThreadPoolExecutor.new(
|
61
|
+
:min_threads => @pool_size,
|
62
|
+
:max_threads => @pool_size,
|
63
|
+
:max_queue => @queue_capacity,
|
64
|
+
:fallback_policy => :caller_runs
|
65
|
+
)
|
66
|
+
|
67
|
+
# A thread-safe queue to collect exceptions cross-threads.
|
68
|
+
# We really only need to save the first exception, but a queue
|
69
|
+
# is a convenient way to store a value concurrency-safely, and
|
70
|
+
# might as well store all of them.
|
71
|
+
@exceptions_caught_queue = Queue.new
|
72
|
+
end
|
73
|
+
end
|
74
|
+
|
75
|
+
# Pass it a block, MAYBE gets executed in the bg in a thread pool. Maybe
|
76
|
+
# gets executed in the calling thread.
|
77
|
+
#
|
78
|
+
# There are actually two 'maybes':
|
79
|
+
#
|
80
|
+
# * If Traject::ThreadPool was configured with null thread pool, then ALL
|
81
|
+
# work will be executed in calling thread.
|
82
|
+
#
|
83
|
+
# * If there is a thread pool, but it's work queue is full, then a job
|
84
|
+
# will be executed in calling thread (because we configured our java
|
85
|
+
# thread pool with a limited sized queue, and CallerRunsPolicy rejection strategy)
|
86
|
+
#
|
87
|
+
# You can pass arbitrary arguments to the method, that will then be passed
|
88
|
+
# to your block -- similar to how ruby Thread.new works. This is convenient
|
89
|
+
# for creating variables unique to the block that won't be shared outside
|
90
|
+
# the thread:
|
91
|
+
#
|
92
|
+
# thread_pool.maybe_in_thread_pool(x, y) do |x1, y1|
|
93
|
+
# 100.times do
|
94
|
+
# something_with(x1)
|
95
|
+
# end
|
96
|
+
# end
|
97
|
+
# x = "someting else"
|
98
|
+
# # If we hadn't passed args with block, and had just
|
99
|
+
# # used x in the block, it'd be the SAME x as this one,
|
100
|
+
# # and would be pointing to a different string now!
|
101
|
+
#
|
102
|
+
# Note, that just makes block-local variables, it doesn't
|
103
|
+
# help you with whether a data structure itself is thread safe.
|
104
|
+
def maybe_in_thread_pool(*args)
|
105
|
+
start_t = Time.now
|
106
|
+
|
107
|
+
if @thread_pool
|
108
|
+
@thread_pool.post do
|
109
|
+
begin
|
110
|
+
yield(*args)
|
111
|
+
rescue Exception => e
|
112
|
+
collect_exception(e)
|
113
|
+
end
|
114
|
+
end
|
115
|
+
else
|
116
|
+
yield(*args)
|
117
|
+
end
|
118
|
+
|
119
|
+
end
|
120
|
+
|
121
|
+
|
122
|
+
# thread-safe way of storing an exception, to raise
|
123
|
+
# later in a different thread. We don't guarantee
|
124
|
+
# that we can store more than one at a time, only
|
125
|
+
# the first one recorded may be stored.
|
126
|
+
def collect_exception(e)
|
127
|
+
@exceptions_caught_queue.push(e)
|
128
|
+
end
|
129
|
+
|
130
|
+
# If there's a stored collected exception, raise it
|
131
|
+
# again now. Call this to re-raise exceptions caught in
|
132
|
+
# other threads in the thread of your choice.
|
133
|
+
#
|
134
|
+
# If you call this method on a ThreadPool initialized with nil
|
135
|
+
# as a non-functioning threadpool -- then this method is just
|
136
|
+
# a no-op.
|
137
|
+
def raise_collected_exception!
|
138
|
+
if @exceptions_caught_queue && (! @exceptions_caught_queue.empty?)
|
139
|
+
e = @exceptions_caught_queue.pop
|
140
|
+
raise e
|
141
|
+
end
|
142
|
+
end
|
143
|
+
|
144
|
+
# shutdown threadpool, and wait for all work to complete.
|
145
|
+
# this one is also a no-op if you have a null ThreadPool that
|
146
|
+
# doesn't really have a threadpool at all.
|
147
|
+
#
|
148
|
+
# returns elapsed time in seconds it took to shutdown
|
149
|
+
def shutdown_and_wait
|
150
|
+
start_t = Time.now
|
151
|
+
|
152
|
+
if @thread_pool
|
153
|
+
@thread_pool.shutdown
|
154
|
+
@thread_pool.wait_for_termination
|
155
|
+
end
|
156
|
+
|
157
|
+
return (Time.now - start_t)
|
158
|
+
end
|
159
|
+
|
160
|
+
end
|
161
|
+
end
|
@@ -0,0 +1,267 @@
|
|
1
|
+
require 'traject'
|
2
|
+
|
3
|
+
require 'yaml'
|
4
|
+
require 'dot-properties'
|
5
|
+
|
6
|
+
|
7
|
+
module Traject
|
8
|
+
# A TranslationMap is basically just something that has a hash-like #[]
|
9
|
+
# method to map from input strings to output strings:
|
10
|
+
#
|
11
|
+
# translation_map["some_input"] #=> some_output
|
12
|
+
#
|
13
|
+
# Input is assumed to always be string, output is either string
|
14
|
+
# or array of strings.
|
15
|
+
#
|
16
|
+
# What makes it more useful than a stunted hash is it's ability to load
|
17
|
+
# the hash definitions from configuration files, either pure ruby,
|
18
|
+
# yaml, or java .properties file (not all .properties features may
|
19
|
+
# be supported, we use dot-properties gem for reading)
|
20
|
+
#
|
21
|
+
# traject's `extract_marc` macro allows you to specify a :translation_map=>filename argument
|
22
|
+
# that will automatically find and use a translation map on the resulting data:
|
23
|
+
#
|
24
|
+
# extract_marc("040a", :translation_map => "languages")
|
25
|
+
#
|
26
|
+
# Or you can always create one yourself and use it how you like:
|
27
|
+
#
|
28
|
+
# map = TranslationMap.new("languages")
|
29
|
+
#
|
30
|
+
# In either case, TranslationMap will look for a file named, in that example,
|
31
|
+
# `languages.rb` or `languages.yaml` or `languages.properties`,
|
32
|
+
# somewhere in the ruby $LOAD_PATH in a `/translation_maps` subdir.
|
33
|
+
#
|
34
|
+
# * Also looks for "/translation_maps" subdir in load paths, so
|
35
|
+
# for instance you can have a gem that keeps translation maps
|
36
|
+
# in ./lib/translation_maps, and it Just Works.
|
37
|
+
#
|
38
|
+
# * Note you do NOT supply the .rb, .yaml, or .properties suffix yourself,
|
39
|
+
# it'll use whichever it finds (allows calling code to not care which is used).
|
40
|
+
#
|
41
|
+
# Ruby files just need to have their last line eval to a hash. They file
|
42
|
+
# will be run through `eval`, don't do it with untrusted content (naturally)
|
43
|
+
#
|
44
|
+
# You can also pass in a Hash for consistency to TranslationMap.new, although
|
45
|
+
# I don't know why you'd want to.
|
46
|
+
#
|
47
|
+
# ## Special default handling
|
48
|
+
#
|
49
|
+
# The key "__default__" in the hash is treated specially. If set to a string,
|
50
|
+
# that string will be returned by the TranslationMap for any input not otherwise
|
51
|
+
# included. If set to the special string "__passthrough__", then for input not
|
52
|
+
# mapped, the original input string will be returned.
|
53
|
+
#
|
54
|
+
# This is most useful for YAML definition files, if you are using an actual ruby
|
55
|
+
# hash, you could just set the hash to do what you want using Hash#default_proc
|
56
|
+
# etc.
|
57
|
+
#
|
58
|
+
# Or, when calling TranslationMap.new(), you can pass in options over-riding special
|
59
|
+
# key too:
|
60
|
+
#
|
61
|
+
# TranslationMap.new("something", :default => "foo")
|
62
|
+
# TranslationMap.new("something", :default => :passthrough)
|
63
|
+
#
|
64
|
+
# ## Output: String or array of strings
|
65
|
+
#
|
66
|
+
# The output can be a string or an array of strings, or nil. It should not be anything else.
|
67
|
+
# When used with the #translate_array! method, one string can be replaced by multiple values
|
68
|
+
# (array of strings) or removed (nil)
|
69
|
+
#
|
70
|
+
# There's no way to specify multiple return values in a .properties, use .yaml or .rb for that.
|
71
|
+
#
|
72
|
+
# ## Caching
|
73
|
+
#
|
74
|
+
# Lookup and loading of configuration files will be cached, for efficiency.
|
75
|
+
# You can reset with `TranslationMap.reset_cache!`
|
76
|
+
#
|
77
|
+
# ## YAML example:
|
78
|
+
#
|
79
|
+
# key: value
|
80
|
+
# key2: value2 multiple words fine
|
81
|
+
# key2b: "Although you can use quotes if you want: Or need."
|
82
|
+
# key3:
|
83
|
+
# - array
|
84
|
+
# - of
|
85
|
+
# - values look like this
|
86
|
+
#
|
87
|
+
# ## Alternatives
|
88
|
+
# `Traject::TranslationMap` provides an easy way to deal with the most common translation case:
|
89
|
+
# simple key-value stores with optional default values.
|
90
|
+
#
|
91
|
+
# If you need more complex translation, you can simply use `#map!`
|
92
|
+
# or its kin to work on the `accumulator` in a block
|
93
|
+
#
|
94
|
+
#
|
95
|
+
#
|
96
|
+
# # get a lousy language detection of any vernacular title
|
97
|
+
# require 'whatlanguage'
|
98
|
+
# wl = WhatLanguage.new(:all)
|
99
|
+
# to_field 'vernacular_langauge', extract_marc('245', :alternate_script=>:only) do |rec, acc|
|
100
|
+
# # accumulator is already filled with the values of any 880s that reference a 245 because
|
101
|
+
# # of the call to #extract_marc
|
102
|
+
# acc.map! {|x| wl.language(x) }
|
103
|
+
# acc.uniq!
|
104
|
+
# end
|
105
|
+
# Within the block, you may also be interested in using:
|
106
|
+
# * a case-insentive hash, perhaps like [this one](https://github.com/junegunn/insensitive_hash)
|
107
|
+
# * a [MatchMap](https://github.com/billdueber/match_map), which implements pattern-matching logic similar to solrmarc's pattern files
|
108
|
+
class TranslationMap
|
109
|
+
class Cache
|
110
|
+
def initialize
|
111
|
+
@cached = Hash.new
|
112
|
+
end
|
113
|
+
|
114
|
+
# Returns an actual Hash -- or nil if none found.
|
115
|
+
def lookup(path)
|
116
|
+
unless @cached.has_key?(path)
|
117
|
+
@cached[path] = _lookup!(path)
|
118
|
+
end
|
119
|
+
return @cached[path]
|
120
|
+
end
|
121
|
+
|
122
|
+
# force lookup, without using cache.
|
123
|
+
# used by cache. Returns the actual hash.
|
124
|
+
# Returns nil if none found.
|
125
|
+
# May raise on syntax error in file being loaded.
|
126
|
+
def _lookup!(path)
|
127
|
+
found = nil
|
128
|
+
|
129
|
+
$LOAD_PATH.each do |base|
|
130
|
+
rb_file = File.join( base, "translation_maps", "#{path}.rb" )
|
131
|
+
yaml_file = File.join( base, "translation_maps", "#{path}.yaml" )
|
132
|
+
prop_file = File.join(base, "translation_maps", "#{path}.properties" )
|
133
|
+
|
134
|
+
if File.exists? rb_file
|
135
|
+
found = eval( File.open(rb_file).read , binding, rb_file )
|
136
|
+
break
|
137
|
+
elsif File.exists? yaml_file
|
138
|
+
found = YAML.load_file(yaml_file)
|
139
|
+
break
|
140
|
+
elsif File.exists? prop_file
|
141
|
+
found = Traject::TranslationMap.read_properties(prop_file)
|
142
|
+
break
|
143
|
+
end
|
144
|
+
end
|
145
|
+
|
146
|
+
# Cached hash can't be mutated without weird consequences, let's
|
147
|
+
# freeze it!
|
148
|
+
found.freeze if found
|
149
|
+
|
150
|
+
return found
|
151
|
+
end
|
152
|
+
|
153
|
+
def reset_cache!
|
154
|
+
@cached.clear
|
155
|
+
end
|
156
|
+
|
157
|
+
end
|
158
|
+
|
159
|
+
attr_reader :hash
|
160
|
+
attr_reader :default
|
161
|
+
|
162
|
+
class << self
|
163
|
+
attr_accessor :cache
|
164
|
+
def reset_cache!
|
165
|
+
cache.reset_cache!
|
166
|
+
end
|
167
|
+
end
|
168
|
+
self.cache = Cache.new
|
169
|
+
|
170
|
+
|
171
|
+
def initialize(defn, options = {})
|
172
|
+
if defn.kind_of? Hash
|
173
|
+
@hash = defn
|
174
|
+
elsif defn.kind_of? self.class
|
175
|
+
@hash = defn.to_hash
|
176
|
+
@default = defn.default
|
177
|
+
else
|
178
|
+
@hash = self.class.cache.lookup(defn)
|
179
|
+
raise NotFound.new(defn) if @hash.nil?
|
180
|
+
end
|
181
|
+
|
182
|
+
if options[:default]
|
183
|
+
@default = options[:default]
|
184
|
+
elsif @hash.has_key? "__default__"
|
185
|
+
@default = @hash["__default__"]
|
186
|
+
end
|
187
|
+
end
|
188
|
+
|
189
|
+
def [](key)
|
190
|
+
if self.default && (! @hash.has_key?(key))
|
191
|
+
if self.default == "__passthrough__"
|
192
|
+
return key
|
193
|
+
else
|
194
|
+
return self.default
|
195
|
+
end
|
196
|
+
end
|
197
|
+
|
198
|
+
@hash[key]
|
199
|
+
end
|
200
|
+
alias_method :map, :[]
|
201
|
+
|
202
|
+
# Returns a dup of internal hash, dup so you can modify it
|
203
|
+
# if you like.
|
204
|
+
def to_hash
|
205
|
+
dup = @hash.dup
|
206
|
+
dup.delete("__default__")
|
207
|
+
dup
|
208
|
+
end
|
209
|
+
|
210
|
+
# Run every element of an array through this translation map,
|
211
|
+
# return the resulting array. If translation map returns nil,
|
212
|
+
# original element will be missing from output.
|
213
|
+
#
|
214
|
+
# If an input maps to an array, each element of the array will be flattened
|
215
|
+
# into the output.
|
216
|
+
#
|
217
|
+
# If an input maps to nil, it will cause the input element to be removed
|
218
|
+
# entirely.
|
219
|
+
def translate_array(array)
|
220
|
+
array.each_with_object([]) do |input_element, output_array|
|
221
|
+
output_element = self.map(input_element)
|
222
|
+
if output_element.kind_of? Array
|
223
|
+
output_array.concat output_element
|
224
|
+
elsif ! output_element.nil?
|
225
|
+
output_array << output_element
|
226
|
+
end
|
227
|
+
end
|
228
|
+
end
|
229
|
+
|
230
|
+
def translate_array!(array)
|
231
|
+
array.replace( self.translate_array(array))
|
232
|
+
end
|
233
|
+
|
234
|
+
# Return a new TranslationMap that results from merging argument on top of self.
|
235
|
+
# Can be useful for taking an existing translation map, but merging a few
|
236
|
+
# overrides on top.
|
237
|
+
#
|
238
|
+
# merged_map = TranslationMap.new(something).merge TranslationMap.new(else)
|
239
|
+
# #...
|
240
|
+
# merged_map.translate_array(something) # etc
|
241
|
+
#
|
242
|
+
# If a default is set in the second map, it will merge over the first too.
|
243
|
+
#
|
244
|
+
# You can also pass in a plain hash as an arg, instead of an existing TranslationMap:
|
245
|
+
#
|
246
|
+
# TranslationMap.new(something).merge("overridden_key" => "value", "a" => "")
|
247
|
+
def merge(other_map)
|
248
|
+
default = other_map.default || self.default
|
249
|
+
TranslationMap.new(self.to_hash.merge(other_map.to_hash), :default => default)
|
250
|
+
end
|
251
|
+
|
252
|
+
class NotFound < Exception
|
253
|
+
def initialize(path)
|
254
|
+
super("No translation map definition file found at 'translation_maps/#{path}.[rb|yaml|properties]' in load path: #{$LOAD_PATH}")
|
255
|
+
end
|
256
|
+
end
|
257
|
+
|
258
|
+
protected
|
259
|
+
|
260
|
+
# We use dot-properties gem for reading .properties files,
|
261
|
+
# return a hash.
|
262
|
+
def self.read_properties(file_name)
|
263
|
+
return DotProperties.load(file_name).to_h
|
264
|
+
end
|
265
|
+
|
266
|
+
end
|
267
|
+
end
|
data/lib/traject/util.rb
ADDED
@@ -0,0 +1,52 @@
|
|
1
|
+
require 'traject'
|
2
|
+
|
3
|
+
module Traject
|
4
|
+
# Just some internal utility methods
|
5
|
+
module Util
|
6
|
+
|
7
|
+
def self.exception_to_log_message(e)
|
8
|
+
indent = " "
|
9
|
+
|
10
|
+
msg = indent + "Exception: " + e.class.name + ": " + e.message + "\n"
|
11
|
+
msg += indent + e.backtrace.first + "\n"
|
12
|
+
|
13
|
+
if (e.respond_to?(:getRootCause) && e.getRootCause && e != e.getRootCause )
|
14
|
+
caused_by = e.getRootCause
|
15
|
+
msg += indent + "Caused by\n"
|
16
|
+
msg += indent + caused_by.class.name + ": " + caused_by.message + "\n"
|
17
|
+
msg += indent + caused_by.backtrace.first + "\n"
|
18
|
+
end
|
19
|
+
|
20
|
+
return msg
|
21
|
+
end
|
22
|
+
|
23
|
+
# From ruby #caller method, you get an array. Pass one line
|
24
|
+
# of the array here, get just file and line number out.
|
25
|
+
def self.extract_caller_location(str)
|
26
|
+
str.split(':in `').first
|
27
|
+
end
|
28
|
+
|
29
|
+
|
30
|
+
|
31
|
+
# Ruby stdlib queue lacks a 'drain' function, we write one.
|
32
|
+
#
|
33
|
+
# Removes everything currently in the ruby stdlib queue, and returns
|
34
|
+
# it an array. Should be concurrent-safe, but queue may still have
|
35
|
+
# some things in it after drain, if there are concurrent writers.
|
36
|
+
def self.drain_queue(queue)
|
37
|
+
result = []
|
38
|
+
|
39
|
+
queue_size = queue.size
|
40
|
+
begin
|
41
|
+
queue_size.times do
|
42
|
+
result << queue.deq(:raise_if_empty)
|
43
|
+
end
|
44
|
+
rescue ThreadError
|
45
|
+
# Need do nothing, queue was concurrently popped, no biggie
|
46
|
+
end
|
47
|
+
|
48
|
+
return result
|
49
|
+
end
|
50
|
+
|
51
|
+
end
|
52
|
+
end
|