traject 2.0.0-java
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +18 -0
- data/.travis.yml +27 -0
- data/.yardopts +3 -0
- data/Gemfile +12 -0
- data/LICENSE.txt +20 -0
- data/README.md +461 -0
- data/Rakefile +21 -0
- data/bench/bench.rb +30 -0
- data/bin/traject +16 -0
- data/doc/batch_execution.md +243 -0
- data/doc/extending.md +190 -0
- data/doc/indexing_rules.md +265 -0
- data/doc/other_commands.md +47 -0
- data/doc/settings.md +101 -0
- data/lib/tasks/load_maps.rake +48 -0
- data/lib/traject.rb +11 -0
- data/lib/traject/command_line.rb +301 -0
- data/lib/traject/csv_writer.rb +34 -0
- data/lib/traject/debug_writer.rb +47 -0
- data/lib/traject/delimited_writer.rb +110 -0
- data/lib/traject/indexer.rb +613 -0
- data/lib/traject/indexer/settings.rb +110 -0
- data/lib/traject/json_writer.rb +51 -0
- data/lib/traject/line_writer.rb +63 -0
- data/lib/traject/macros/basic.rb +9 -0
- data/lib/traject/macros/marc21.rb +223 -0
- data/lib/traject/macros/marc21_semantics.rb +584 -0
- data/lib/traject/macros/marc_format_classifier.rb +197 -0
- data/lib/traject/marc_extractor.rb +410 -0
- data/lib/traject/marc_reader.rb +89 -0
- data/lib/traject/mock_reader.rb +97 -0
- data/lib/traject/ndj_reader.rb +40 -0
- data/lib/traject/null_writer.rb +22 -0
- data/lib/traject/qualified_const_get.rb +40 -0
- data/lib/traject/solr_json_writer.rb +277 -0
- data/lib/traject/thread_pool.rb +161 -0
- data/lib/traject/translation_map.rb +267 -0
- data/lib/traject/util.rb +52 -0
- data/lib/traject/version.rb +3 -0
- data/lib/traject/yaml_writer.rb +9 -0
- data/lib/translation_maps/lcc_top_level.yaml +26 -0
- data/lib/translation_maps/marc_genre_007.yaml +9 -0
- data/lib/translation_maps/marc_genre_leader.yaml +22 -0
- data/lib/translation_maps/marc_geographic.yaml +589 -0
- data/lib/translation_maps/marc_instruments.yaml +102 -0
- data/lib/translation_maps/marc_languages.yaml +490 -0
- data/test/debug_writer_test.rb +38 -0
- data/test/delimited_writer_test.rb +104 -0
- data/test/indexer/each_record_test.rb +59 -0
- data/test/indexer/macros_marc21_semantics_test.rb +391 -0
- data/test/indexer/macros_marc21_test.rb +190 -0
- data/test/indexer/macros_test.rb +40 -0
- data/test/indexer/map_record_test.rb +209 -0
- data/test/indexer/read_write_test.rb +101 -0
- data/test/indexer/settings_test.rb +152 -0
- data/test/indexer/to_field_test.rb +77 -0
- data/test/marc_extractor_test.rb +412 -0
- data/test/marc_format_classifier_test.rb +98 -0
- data/test/marc_reader_test.rb +110 -0
- data/test/solr_json_writer_test.rb +248 -0
- data/test/test_helper.rb +90 -0
- data/test/test_support/245_no_ab.marc +1 -0
- data/test/test_support/880_with_no_6.utf8.marc +1 -0
- data/test/test_support/bad_subfield_code.marc +1 -0
- data/test/test_support/bad_utf_byte.utf8.marc +1 -0
- data/test/test_support/date_resort_to_260.marc +1 -0
- data/test/test_support/date_type_r_missing_date2.marc +1 -0
- data/test/test_support/date_with_u.marc +1 -0
- data/test/test_support/demo_config.rb +155 -0
- data/test/test_support/emptyish_record.marc +1 -0
- data/test/test_support/escaped_character_reference.marc8.marc +1 -0
- data/test/test_support/george_eliot.marc +1 -0
- data/test/test_support/hebrew880s.marc +1 -0
- data/test/test_support/louis_armstrong.marc +1 -0
- data/test/test_support/manufacturing_consent.marc +1 -0
- data/test/test_support/manuscript_online_thesis.marc +1 -0
- data/test/test_support/microform_online_conference.marc +1 -0
- data/test/test_support/multi_era.marc +1 -0
- data/test/test_support/multi_geo.marc +1 -0
- data/test/test_support/musical_cage.marc +1 -0
- data/test/test_support/nature.marc +1 -0
- data/test/test_support/one-marc8.mrc +1 -0
- data/test/test_support/online_only.marc +1 -0
- data/test/test_support/packed_041a_lang.marc +1 -0
- data/test/test_support/test_data.utf8.json +30 -0
- data/test/test_support/test_data.utf8.marc.xml +2609 -0
- data/test/test_support/test_data.utf8.mrc +1 -0
- data/test/test_support/test_data.utf8.mrc.gz +0 -0
- data/test/test_support/the_business_ren.marc +1 -0
- data/test/translation_map_test.rb +225 -0
- data/test/translation_maps/bad_ruby.rb +8 -0
- data/test/translation_maps/bad_yaml.yaml +1 -0
- data/test/translation_maps/both_map.rb +1 -0
- data/test/translation_maps/both_map.yaml +1 -0
- data/test/translation_maps/default_literal.rb +10 -0
- data/test/translation_maps/default_passthrough.rb +10 -0
- data/test/translation_maps/marc_040a_translate_test.yaml +1 -0
- data/test/translation_maps/properties_map.properties +5 -0
- data/test/translation_maps/ruby_map.rb +10 -0
- data/test/translation_maps/translate_array_test.yaml +8 -0
- data/test/translation_maps/yaml_map.yaml +7 -0
- data/traject.gemspec +47 -0
- metadata +382 -0
|
@@ -0,0 +1,161 @@
|
|
|
1
|
+
require 'concurrent'
|
|
2
|
+
require 'thread' # for Queue
|
|
3
|
+
|
|
4
|
+
module Traject
|
|
5
|
+
# An abstraction wrapping a Concurrent::ThreadPool in some configuration choices
|
|
6
|
+
# and other apparatus. Concurrent::ThreadPool is a Java ThreadPool executor on
|
|
7
|
+
# jruby for performance, and is ruby-concurrent's own ruby implementation otherwise.
|
|
8
|
+
#
|
|
9
|
+
# 1) Initialize with chosen pool size -- we create fixed size pools, where
|
|
10
|
+
# core and max sizes are the same.
|
|
11
|
+
#
|
|
12
|
+
# 2) If initialized with nil or 0 for threadcount, no thread pool will actually
|
|
13
|
+
# be created, and work sent to the Traject::ThreadPool will just be executed
|
|
14
|
+
# in the caller thread. We call this a nil threadpool. One situation it can be useful
|
|
15
|
+
# is if you are running under MRI, where multi-core parallelism isn't available, so
|
|
16
|
+
# an actual threadpool may not be useful. (Although in some cases a thread pool,
|
|
17
|
+
# especially one with size 1, can be useful in MRI for I/O blocking operations)
|
|
18
|
+
#
|
|
19
|
+
# 3) Use the #maybe_in_threadpool method to send blocks to thread pool for
|
|
20
|
+
# execution -- if configurred with a nil threadcount, your block will just be
|
|
21
|
+
# executed in calling thread. Be careful to not refer to any non-local
|
|
22
|
+
# variables in the block, unless the variable has an object you can
|
|
23
|
+
# use thread-safely!
|
|
24
|
+
#
|
|
25
|
+
# 4) We configure our underlying Concurrent::ThreadPool
|
|
26
|
+
# with a work queue that will buffer up to (pool_size*3) tasks. If the queue is full,
|
|
27
|
+
# the underlying Concurrent::ThreadPool is set up to use the :caller_runs policy
|
|
28
|
+
# meaning the block will end up executing in caller's own thread. With the kind
|
|
29
|
+
# of work we're doing, where each unit of work is small and there are many of them--
|
|
30
|
+
# the :caller_runs policy serves as an effective 'back pressure' mechanism to keep
|
|
31
|
+
# the work queue from getting too large and exhausting memory, when producers are
|
|
32
|
+
# faster than consumers.
|
|
33
|
+
#
|
|
34
|
+
# 5) Any exceptions raised by pool-executed work are captured accumulated in a thread-safe
|
|
35
|
+
# manner, and can be re-raised in the thread of your choice by calling
|
|
36
|
+
# #raise_collected_exception!
|
|
37
|
+
#
|
|
38
|
+
# 6) When you are done with the threadpool, you can and must call
|
|
39
|
+
# #shutdown_and_wait, which will wait for all current queued work
|
|
40
|
+
# to complete, then return. You can not give any more work to the pool
|
|
41
|
+
# after you do this. By default it'll wait pretty much forever, which should
|
|
42
|
+
# be fine. If you never call shutdown, then queued or in-progress work
|
|
43
|
+
# may be abandoned when the program ends, which would be bad.
|
|
44
|
+
#
|
|
45
|
+
# 7) We will keep track of total times a block is run in thread pool, and
|
|
46
|
+
# total elapsed (wall) time of running all blocks, so an average_execution_ms
|
|
47
|
+
# time can be given. #average_execution_ms may be inaccurate if called when
|
|
48
|
+
# threads are still executing, as it's not entirely thread safe (may get
|
|
49
|
+
# an off by one as to total iterations)
|
|
50
|
+
class ThreadPool
|
|
51
|
+
attr_reader :pool_size, :queue_capacity
|
|
52
|
+
|
|
53
|
+
# First arg is pool size, 0 or nil and we'll be a null/no-op pool which executes
|
|
54
|
+
# work in caller thread.
|
|
55
|
+
def initialize(pool_size)
|
|
56
|
+
unless pool_size.nil? || pool_size == 0
|
|
57
|
+
@pool_size = pool_size.to_i
|
|
58
|
+
@queue_capacity = pool_size * 3
|
|
59
|
+
|
|
60
|
+
@thread_pool = Concurrent::ThreadPoolExecutor.new(
|
|
61
|
+
:min_threads => @pool_size,
|
|
62
|
+
:max_threads => @pool_size,
|
|
63
|
+
:max_queue => @queue_capacity,
|
|
64
|
+
:fallback_policy => :caller_runs
|
|
65
|
+
)
|
|
66
|
+
|
|
67
|
+
# A thread-safe queue to collect exceptions cross-threads.
|
|
68
|
+
# We really only need to save the first exception, but a queue
|
|
69
|
+
# is a convenient way to store a value concurrency-safely, and
|
|
70
|
+
# might as well store all of them.
|
|
71
|
+
@exceptions_caught_queue = Queue.new
|
|
72
|
+
end
|
|
73
|
+
end
|
|
74
|
+
|
|
75
|
+
# Pass it a block, MAYBE gets executed in the bg in a thread pool. Maybe
|
|
76
|
+
# gets executed in the calling thread.
|
|
77
|
+
#
|
|
78
|
+
# There are actually two 'maybes':
|
|
79
|
+
#
|
|
80
|
+
# * If Traject::ThreadPool was configured with null thread pool, then ALL
|
|
81
|
+
# work will be executed in calling thread.
|
|
82
|
+
#
|
|
83
|
+
# * If there is a thread pool, but it's work queue is full, then a job
|
|
84
|
+
# will be executed in calling thread (because we configured our java
|
|
85
|
+
# thread pool with a limited sized queue, and CallerRunsPolicy rejection strategy)
|
|
86
|
+
#
|
|
87
|
+
# You can pass arbitrary arguments to the method, that will then be passed
|
|
88
|
+
# to your block -- similar to how ruby Thread.new works. This is convenient
|
|
89
|
+
# for creating variables unique to the block that won't be shared outside
|
|
90
|
+
# the thread:
|
|
91
|
+
#
|
|
92
|
+
# thread_pool.maybe_in_thread_pool(x, y) do |x1, y1|
|
|
93
|
+
# 100.times do
|
|
94
|
+
# something_with(x1)
|
|
95
|
+
# end
|
|
96
|
+
# end
|
|
97
|
+
# x = "someting else"
|
|
98
|
+
# # If we hadn't passed args with block, and had just
|
|
99
|
+
# # used x in the block, it'd be the SAME x as this one,
|
|
100
|
+
# # and would be pointing to a different string now!
|
|
101
|
+
#
|
|
102
|
+
# Note, that just makes block-local variables, it doesn't
|
|
103
|
+
# help you with whether a data structure itself is thread safe.
|
|
104
|
+
def maybe_in_thread_pool(*args)
|
|
105
|
+
start_t = Time.now
|
|
106
|
+
|
|
107
|
+
if @thread_pool
|
|
108
|
+
@thread_pool.post do
|
|
109
|
+
begin
|
|
110
|
+
yield(*args)
|
|
111
|
+
rescue Exception => e
|
|
112
|
+
collect_exception(e)
|
|
113
|
+
end
|
|
114
|
+
end
|
|
115
|
+
else
|
|
116
|
+
yield(*args)
|
|
117
|
+
end
|
|
118
|
+
|
|
119
|
+
end
|
|
120
|
+
|
|
121
|
+
|
|
122
|
+
# thread-safe way of storing an exception, to raise
|
|
123
|
+
# later in a different thread. We don't guarantee
|
|
124
|
+
# that we can store more than one at a time, only
|
|
125
|
+
# the first one recorded may be stored.
|
|
126
|
+
def collect_exception(e)
|
|
127
|
+
@exceptions_caught_queue.push(e)
|
|
128
|
+
end
|
|
129
|
+
|
|
130
|
+
# If there's a stored collected exception, raise it
|
|
131
|
+
# again now. Call this to re-raise exceptions caught in
|
|
132
|
+
# other threads in the thread of your choice.
|
|
133
|
+
#
|
|
134
|
+
# If you call this method on a ThreadPool initialized with nil
|
|
135
|
+
# as a non-functioning threadpool -- then this method is just
|
|
136
|
+
# a no-op.
|
|
137
|
+
def raise_collected_exception!
|
|
138
|
+
if @exceptions_caught_queue && (! @exceptions_caught_queue.empty?)
|
|
139
|
+
e = @exceptions_caught_queue.pop
|
|
140
|
+
raise e
|
|
141
|
+
end
|
|
142
|
+
end
|
|
143
|
+
|
|
144
|
+
# shutdown threadpool, and wait for all work to complete.
|
|
145
|
+
# this one is also a no-op if you have a null ThreadPool that
|
|
146
|
+
# doesn't really have a threadpool at all.
|
|
147
|
+
#
|
|
148
|
+
# returns elapsed time in seconds it took to shutdown
|
|
149
|
+
def shutdown_and_wait
|
|
150
|
+
start_t = Time.now
|
|
151
|
+
|
|
152
|
+
if @thread_pool
|
|
153
|
+
@thread_pool.shutdown
|
|
154
|
+
@thread_pool.wait_for_termination
|
|
155
|
+
end
|
|
156
|
+
|
|
157
|
+
return (Time.now - start_t)
|
|
158
|
+
end
|
|
159
|
+
|
|
160
|
+
end
|
|
161
|
+
end
|
|
@@ -0,0 +1,267 @@
|
|
|
1
|
+
require 'traject'
|
|
2
|
+
|
|
3
|
+
require 'yaml'
|
|
4
|
+
require 'dot-properties'
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
module Traject
|
|
8
|
+
# A TranslationMap is basically just something that has a hash-like #[]
|
|
9
|
+
# method to map from input strings to output strings:
|
|
10
|
+
#
|
|
11
|
+
# translation_map["some_input"] #=> some_output
|
|
12
|
+
#
|
|
13
|
+
# Input is assumed to always be string, output is either string
|
|
14
|
+
# or array of strings.
|
|
15
|
+
#
|
|
16
|
+
# What makes it more useful than a stunted hash is it's ability to load
|
|
17
|
+
# the hash definitions from configuration files, either pure ruby,
|
|
18
|
+
# yaml, or java .properties file (not all .properties features may
|
|
19
|
+
# be supported, we use dot-properties gem for reading)
|
|
20
|
+
#
|
|
21
|
+
# traject's `extract_marc` macro allows you to specify a :translation_map=>filename argument
|
|
22
|
+
# that will automatically find and use a translation map on the resulting data:
|
|
23
|
+
#
|
|
24
|
+
# extract_marc("040a", :translation_map => "languages")
|
|
25
|
+
#
|
|
26
|
+
# Or you can always create one yourself and use it how you like:
|
|
27
|
+
#
|
|
28
|
+
# map = TranslationMap.new("languages")
|
|
29
|
+
#
|
|
30
|
+
# In either case, TranslationMap will look for a file named, in that example,
|
|
31
|
+
# `languages.rb` or `languages.yaml` or `languages.properties`,
|
|
32
|
+
# somewhere in the ruby $LOAD_PATH in a `/translation_maps` subdir.
|
|
33
|
+
#
|
|
34
|
+
# * Also looks for "/translation_maps" subdir in load paths, so
|
|
35
|
+
# for instance you can have a gem that keeps translation maps
|
|
36
|
+
# in ./lib/translation_maps, and it Just Works.
|
|
37
|
+
#
|
|
38
|
+
# * Note you do NOT supply the .rb, .yaml, or .properties suffix yourself,
|
|
39
|
+
# it'll use whichever it finds (allows calling code to not care which is used).
|
|
40
|
+
#
|
|
41
|
+
# Ruby files just need to have their last line eval to a hash. They file
|
|
42
|
+
# will be run through `eval`, don't do it with untrusted content (naturally)
|
|
43
|
+
#
|
|
44
|
+
# You can also pass in a Hash for consistency to TranslationMap.new, although
|
|
45
|
+
# I don't know why you'd want to.
|
|
46
|
+
#
|
|
47
|
+
# ## Special default handling
|
|
48
|
+
#
|
|
49
|
+
# The key "__default__" in the hash is treated specially. If set to a string,
|
|
50
|
+
# that string will be returned by the TranslationMap for any input not otherwise
|
|
51
|
+
# included. If set to the special string "__passthrough__", then for input not
|
|
52
|
+
# mapped, the original input string will be returned.
|
|
53
|
+
#
|
|
54
|
+
# This is most useful for YAML definition files, if you are using an actual ruby
|
|
55
|
+
# hash, you could just set the hash to do what you want using Hash#default_proc
|
|
56
|
+
# etc.
|
|
57
|
+
#
|
|
58
|
+
# Or, when calling TranslationMap.new(), you can pass in options over-riding special
|
|
59
|
+
# key too:
|
|
60
|
+
#
|
|
61
|
+
# TranslationMap.new("something", :default => "foo")
|
|
62
|
+
# TranslationMap.new("something", :default => :passthrough)
|
|
63
|
+
#
|
|
64
|
+
# ## Output: String or array of strings
|
|
65
|
+
#
|
|
66
|
+
# The output can be a string or an array of strings, or nil. It should not be anything else.
|
|
67
|
+
# When used with the #translate_array! method, one string can be replaced by multiple values
|
|
68
|
+
# (array of strings) or removed (nil)
|
|
69
|
+
#
|
|
70
|
+
# There's no way to specify multiple return values in a .properties, use .yaml or .rb for that.
|
|
71
|
+
#
|
|
72
|
+
# ## Caching
|
|
73
|
+
#
|
|
74
|
+
# Lookup and loading of configuration files will be cached, for efficiency.
|
|
75
|
+
# You can reset with `TranslationMap.reset_cache!`
|
|
76
|
+
#
|
|
77
|
+
# ## YAML example:
|
|
78
|
+
#
|
|
79
|
+
# key: value
|
|
80
|
+
# key2: value2 multiple words fine
|
|
81
|
+
# key2b: "Although you can use quotes if you want: Or need."
|
|
82
|
+
# key3:
|
|
83
|
+
# - array
|
|
84
|
+
# - of
|
|
85
|
+
# - values look like this
|
|
86
|
+
#
|
|
87
|
+
# ## Alternatives
|
|
88
|
+
# `Traject::TranslationMap` provides an easy way to deal with the most common translation case:
|
|
89
|
+
# simple key-value stores with optional default values.
|
|
90
|
+
#
|
|
91
|
+
# If you need more complex translation, you can simply use `#map!`
|
|
92
|
+
# or its kin to work on the `accumulator` in a block
|
|
93
|
+
#
|
|
94
|
+
#
|
|
95
|
+
#
|
|
96
|
+
# # get a lousy language detection of any vernacular title
|
|
97
|
+
# require 'whatlanguage'
|
|
98
|
+
# wl = WhatLanguage.new(:all)
|
|
99
|
+
# to_field 'vernacular_langauge', extract_marc('245', :alternate_script=>:only) do |rec, acc|
|
|
100
|
+
# # accumulator is already filled with the values of any 880s that reference a 245 because
|
|
101
|
+
# # of the call to #extract_marc
|
|
102
|
+
# acc.map! {|x| wl.language(x) }
|
|
103
|
+
# acc.uniq!
|
|
104
|
+
# end
|
|
105
|
+
# Within the block, you may also be interested in using:
|
|
106
|
+
# * a case-insentive hash, perhaps like [this one](https://github.com/junegunn/insensitive_hash)
|
|
107
|
+
# * a [MatchMap](https://github.com/billdueber/match_map), which implements pattern-matching logic similar to solrmarc's pattern files
|
|
108
|
+
class TranslationMap
|
|
109
|
+
class Cache
|
|
110
|
+
def initialize
|
|
111
|
+
@cached = Hash.new
|
|
112
|
+
end
|
|
113
|
+
|
|
114
|
+
# Returns an actual Hash -- or nil if none found.
|
|
115
|
+
def lookup(path)
|
|
116
|
+
unless @cached.has_key?(path)
|
|
117
|
+
@cached[path] = _lookup!(path)
|
|
118
|
+
end
|
|
119
|
+
return @cached[path]
|
|
120
|
+
end
|
|
121
|
+
|
|
122
|
+
# force lookup, without using cache.
|
|
123
|
+
# used by cache. Returns the actual hash.
|
|
124
|
+
# Returns nil if none found.
|
|
125
|
+
# May raise on syntax error in file being loaded.
|
|
126
|
+
def _lookup!(path)
|
|
127
|
+
found = nil
|
|
128
|
+
|
|
129
|
+
$LOAD_PATH.each do |base|
|
|
130
|
+
rb_file = File.join( base, "translation_maps", "#{path}.rb" )
|
|
131
|
+
yaml_file = File.join( base, "translation_maps", "#{path}.yaml" )
|
|
132
|
+
prop_file = File.join(base, "translation_maps", "#{path}.properties" )
|
|
133
|
+
|
|
134
|
+
if File.exists? rb_file
|
|
135
|
+
found = eval( File.open(rb_file).read , binding, rb_file )
|
|
136
|
+
break
|
|
137
|
+
elsif File.exists? yaml_file
|
|
138
|
+
found = YAML.load_file(yaml_file)
|
|
139
|
+
break
|
|
140
|
+
elsif File.exists? prop_file
|
|
141
|
+
found = Traject::TranslationMap.read_properties(prop_file)
|
|
142
|
+
break
|
|
143
|
+
end
|
|
144
|
+
end
|
|
145
|
+
|
|
146
|
+
# Cached hash can't be mutated without weird consequences, let's
|
|
147
|
+
# freeze it!
|
|
148
|
+
found.freeze if found
|
|
149
|
+
|
|
150
|
+
return found
|
|
151
|
+
end
|
|
152
|
+
|
|
153
|
+
def reset_cache!
|
|
154
|
+
@cached.clear
|
|
155
|
+
end
|
|
156
|
+
|
|
157
|
+
end
|
|
158
|
+
|
|
159
|
+
attr_reader :hash
|
|
160
|
+
attr_reader :default
|
|
161
|
+
|
|
162
|
+
class << self
|
|
163
|
+
attr_accessor :cache
|
|
164
|
+
def reset_cache!
|
|
165
|
+
cache.reset_cache!
|
|
166
|
+
end
|
|
167
|
+
end
|
|
168
|
+
self.cache = Cache.new
|
|
169
|
+
|
|
170
|
+
|
|
171
|
+
def initialize(defn, options = {})
|
|
172
|
+
if defn.kind_of? Hash
|
|
173
|
+
@hash = defn
|
|
174
|
+
elsif defn.kind_of? self.class
|
|
175
|
+
@hash = defn.to_hash
|
|
176
|
+
@default = defn.default
|
|
177
|
+
else
|
|
178
|
+
@hash = self.class.cache.lookup(defn)
|
|
179
|
+
raise NotFound.new(defn) if @hash.nil?
|
|
180
|
+
end
|
|
181
|
+
|
|
182
|
+
if options[:default]
|
|
183
|
+
@default = options[:default]
|
|
184
|
+
elsif @hash.has_key? "__default__"
|
|
185
|
+
@default = @hash["__default__"]
|
|
186
|
+
end
|
|
187
|
+
end
|
|
188
|
+
|
|
189
|
+
def [](key)
|
|
190
|
+
if self.default && (! @hash.has_key?(key))
|
|
191
|
+
if self.default == "__passthrough__"
|
|
192
|
+
return key
|
|
193
|
+
else
|
|
194
|
+
return self.default
|
|
195
|
+
end
|
|
196
|
+
end
|
|
197
|
+
|
|
198
|
+
@hash[key]
|
|
199
|
+
end
|
|
200
|
+
alias_method :map, :[]
|
|
201
|
+
|
|
202
|
+
# Returns a dup of internal hash, dup so you can modify it
|
|
203
|
+
# if you like.
|
|
204
|
+
def to_hash
|
|
205
|
+
dup = @hash.dup
|
|
206
|
+
dup.delete("__default__")
|
|
207
|
+
dup
|
|
208
|
+
end
|
|
209
|
+
|
|
210
|
+
# Run every element of an array through this translation map,
|
|
211
|
+
# return the resulting array. If translation map returns nil,
|
|
212
|
+
# original element will be missing from output.
|
|
213
|
+
#
|
|
214
|
+
# If an input maps to an array, each element of the array will be flattened
|
|
215
|
+
# into the output.
|
|
216
|
+
#
|
|
217
|
+
# If an input maps to nil, it will cause the input element to be removed
|
|
218
|
+
# entirely.
|
|
219
|
+
def translate_array(array)
|
|
220
|
+
array.each_with_object([]) do |input_element, output_array|
|
|
221
|
+
output_element = self.map(input_element)
|
|
222
|
+
if output_element.kind_of? Array
|
|
223
|
+
output_array.concat output_element
|
|
224
|
+
elsif ! output_element.nil?
|
|
225
|
+
output_array << output_element
|
|
226
|
+
end
|
|
227
|
+
end
|
|
228
|
+
end
|
|
229
|
+
|
|
230
|
+
def translate_array!(array)
|
|
231
|
+
array.replace( self.translate_array(array))
|
|
232
|
+
end
|
|
233
|
+
|
|
234
|
+
# Return a new TranslationMap that results from merging argument on top of self.
|
|
235
|
+
# Can be useful for taking an existing translation map, but merging a few
|
|
236
|
+
# overrides on top.
|
|
237
|
+
#
|
|
238
|
+
# merged_map = TranslationMap.new(something).merge TranslationMap.new(else)
|
|
239
|
+
# #...
|
|
240
|
+
# merged_map.translate_array(something) # etc
|
|
241
|
+
#
|
|
242
|
+
# If a default is set in the second map, it will merge over the first too.
|
|
243
|
+
#
|
|
244
|
+
# You can also pass in a plain hash as an arg, instead of an existing TranslationMap:
|
|
245
|
+
#
|
|
246
|
+
# TranslationMap.new(something).merge("overridden_key" => "value", "a" => "")
|
|
247
|
+
def merge(other_map)
|
|
248
|
+
default = other_map.default || self.default
|
|
249
|
+
TranslationMap.new(self.to_hash.merge(other_map.to_hash), :default => default)
|
|
250
|
+
end
|
|
251
|
+
|
|
252
|
+
class NotFound < Exception
|
|
253
|
+
def initialize(path)
|
|
254
|
+
super("No translation map definition file found at 'translation_maps/#{path}.[rb|yaml|properties]' in load path: #{$LOAD_PATH}")
|
|
255
|
+
end
|
|
256
|
+
end
|
|
257
|
+
|
|
258
|
+
protected
|
|
259
|
+
|
|
260
|
+
# We use dot-properties gem for reading .properties files,
|
|
261
|
+
# return a hash.
|
|
262
|
+
def self.read_properties(file_name)
|
|
263
|
+
return DotProperties.load(file_name).to_h
|
|
264
|
+
end
|
|
265
|
+
|
|
266
|
+
end
|
|
267
|
+
end
|
data/lib/traject/util.rb
ADDED
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
require 'traject'
|
|
2
|
+
|
|
3
|
+
module Traject
|
|
4
|
+
# Just some internal utility methods
|
|
5
|
+
module Util
|
|
6
|
+
|
|
7
|
+
def self.exception_to_log_message(e)
|
|
8
|
+
indent = " "
|
|
9
|
+
|
|
10
|
+
msg = indent + "Exception: " + e.class.name + ": " + e.message + "\n"
|
|
11
|
+
msg += indent + e.backtrace.first + "\n"
|
|
12
|
+
|
|
13
|
+
if (e.respond_to?(:getRootCause) && e.getRootCause && e != e.getRootCause )
|
|
14
|
+
caused_by = e.getRootCause
|
|
15
|
+
msg += indent + "Caused by\n"
|
|
16
|
+
msg += indent + caused_by.class.name + ": " + caused_by.message + "\n"
|
|
17
|
+
msg += indent + caused_by.backtrace.first + "\n"
|
|
18
|
+
end
|
|
19
|
+
|
|
20
|
+
return msg
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
# From ruby #caller method, you get an array. Pass one line
|
|
24
|
+
# of the array here, get just file and line number out.
|
|
25
|
+
def self.extract_caller_location(str)
|
|
26
|
+
str.split(':in `').first
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
# Ruby stdlib queue lacks a 'drain' function, we write one.
|
|
32
|
+
#
|
|
33
|
+
# Removes everything currently in the ruby stdlib queue, and returns
|
|
34
|
+
# it an array. Should be concurrent-safe, but queue may still have
|
|
35
|
+
# some things in it after drain, if there are concurrent writers.
|
|
36
|
+
def self.drain_queue(queue)
|
|
37
|
+
result = []
|
|
38
|
+
|
|
39
|
+
queue_size = queue.size
|
|
40
|
+
begin
|
|
41
|
+
queue_size.times do
|
|
42
|
+
result << queue.deq(:raise_if_empty)
|
|
43
|
+
end
|
|
44
|
+
rescue ThreadError
|
|
45
|
+
# Need do nothing, queue was concurrently popped, no biggie
|
|
46
|
+
end
|
|
47
|
+
|
|
48
|
+
return result
|
|
49
|
+
end
|
|
50
|
+
|
|
51
|
+
end
|
|
52
|
+
end
|