omelette 0.0.1a
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +15 -0
- data/.rspec +3 -0
- data/.travis.yml +6 -0
- data/Gemfile +6 -0
- data/Gemfile.lock +75 -0
- data/LICENSE.txt +21 -0
- data/README.md +43 -0
- data/Rakefile +6 -0
- data/bin/console +14 -0
- data/bin/setup +8 -0
- data/exe/omelette +5 -0
- data/lib/omelette.rb +10 -0
- data/lib/omelette/command_line.rb +10 -0
- data/lib/omelette/importer.rb +284 -0
- data/lib/omelette/importer/context.rb +42 -0
- data/lib/omelette/importer/errors.rb +40 -0
- data/lib/omelette/importer/settings.rb +106 -0
- data/lib/omelette/importer/steps.rb +64 -0
- data/lib/omelette/macros/xpath.rb +14 -0
- data/lib/omelette/null_writer.rb +20 -0
- data/lib/omelette/omeka_json_writer.rb +9 -0
- data/lib/omelette/thread_pool.rb +161 -0
- data/lib/omelette/util.rb +146 -0
- data/lib/omelette/version.rb +3 -0
- data/lib/omelette/xml_reader.rb +27 -0
- data/omelette.gemspec +36 -0
- metadata +213 -0
@@ -0,0 +1,146 @@
|
|
1
|
+
require 'rest-client'
|
2
|
+
|
3
|
+
module Omelette
|
4
|
+
module Util
|
5
|
+
|
6
|
+
def exception_to_log_message(e)
|
7
|
+
indent = ' '
|
8
|
+
|
9
|
+
msg = indent + 'Exception: ' + e.class.name + ': ' + e.message + "\n"
|
10
|
+
msg += indent + e.backtrace.first + "\n"
|
11
|
+
|
12
|
+
if (e.respond_to?(:getRootCause) && e.getRootCause && e != e.getRootCause)
|
13
|
+
caused_by = e.getRootCause
|
14
|
+
msg += indent + "Caused by\n"
|
15
|
+
msg += indent + caused_by.class.name + ': ' + caused_by.message + "\n"
|
16
|
+
msg += indent + caused_by.backtrace.first + "\n"
|
17
|
+
end
|
18
|
+
|
19
|
+
return msg
|
20
|
+
end
|
21
|
+
module_function :exception_to_log_message
|
22
|
+
|
23
|
+
# From ruby #caller method, you get an array. Pass one line
|
24
|
+
# of the array here, get just file and line number out.
|
25
|
+
def extract_caller_location(str)
|
26
|
+
str.split(':in `').first
|
27
|
+
end
|
28
|
+
module_function :extract_caller_location
|
29
|
+
|
30
|
+
# Provide a config source file path, and an exception.
|
31
|
+
#
|
32
|
+
# Returns the line number from the first line in the stack
|
33
|
+
# trace of the exception that matches your file path.
|
34
|
+
# of the first line in the backtrace matching that file_path.
|
35
|
+
#
|
36
|
+
# Returns `nil` if no suitable backtrace line can be found.
|
37
|
+
#
|
38
|
+
# Has special logic to try and grep the info out of a SyntaxError, bah.
|
39
|
+
def backtrace_lineno_for_config(file_path, exception)
|
40
|
+
# For a SyntaxError, we really need to grep it from the
|
41
|
+
# exception message, it really appears to be nowhere else. Ugh.
|
42
|
+
if exception.kind_of? SyntaxError
|
43
|
+
if m = /:(\d+):/.match(exception.message)
|
44
|
+
return m[1].to_i
|
45
|
+
end
|
46
|
+
end
|
47
|
+
|
48
|
+
# Otherwise we try to fish it out of the backtrace, first
|
49
|
+
# line matching the config file path.
|
50
|
+
|
51
|
+
# exception.backtrace_locations exists in MRI 2.1+, which makes
|
52
|
+
# our task a lot easier. But not yet in JRuby 1.7.x, so we got to
|
53
|
+
# handle the old way of having to parse the strings in backtrace too.
|
54
|
+
if (exception.respond_to?(:backtrace_locations) &&
|
55
|
+
exception.backtrace_locations &&
|
56
|
+
exception.backtrace_locations.length > 0)
|
57
|
+
location = exception.backtrace_locations.find do |bt|
|
58
|
+
bt.path == file_path
|
59
|
+
end
|
60
|
+
return location ? location.lineno : nil
|
61
|
+
else # have to parse string backtrace
|
62
|
+
exception.backtrace.each do |line|
|
63
|
+
if line.start_with?(file_path)
|
64
|
+
if m = /\A.*\:(\d+)\:in/.match(line)
|
65
|
+
return m[1].to_i
|
66
|
+
break
|
67
|
+
end
|
68
|
+
end
|
69
|
+
end
|
70
|
+
# if we got here, we have nothing
|
71
|
+
return nil
|
72
|
+
end
|
73
|
+
end
|
74
|
+
module_function :backtrace_lineno_for_config
|
75
|
+
|
76
|
+
# Extract just the part of the backtrace that is "below"
|
77
|
+
# the config file mentioned. If we can't find the config file
|
78
|
+
# in the stack trace, we might return empty array.
|
79
|
+
#
|
80
|
+
# If the ruby supports Exception#backtrace_locations, the
|
81
|
+
# returned array will actually be of Thread::Backtrace::Location elements.
|
82
|
+
def backtrace_from_config(file_path, exception)
|
83
|
+
filtered_trace = []
|
84
|
+
found = false
|
85
|
+
|
86
|
+
# MRI 2.1+ has exception.backtrace_locations which makes
|
87
|
+
# this a lot easier, but JRuby 1.7.x doesn't yet, so we
|
88
|
+
# need to do it both ways.
|
89
|
+
if (exception.respond_to?(:backtrace_locations) &&
|
90
|
+
exception.backtrace_locations &&
|
91
|
+
exception.backtrace_locations.length > 0)
|
92
|
+
|
93
|
+
exception.backtrace_locations.each do |location|
|
94
|
+
filtered_trace << location
|
95
|
+
(found=true and break) if location.path == file_path
|
96
|
+
end
|
97
|
+
else
|
98
|
+
filtered_trace = []
|
99
|
+
exception.backtrace.each do |line|
|
100
|
+
filtered_trace << line
|
101
|
+
(found=true and break) if line.start_with?(file_path)
|
102
|
+
end
|
103
|
+
end
|
104
|
+
|
105
|
+
return found ? filtered_trace : []
|
106
|
+
end
|
107
|
+
module_function :backtrace_from_config
|
108
|
+
|
109
|
+
# Ruby stdlib queue lacks a 'drain' function, we write one.
|
110
|
+
#
|
111
|
+
# Removes everything currently in the ruby stdlib queue, and returns
|
112
|
+
# it an array. Should be concurrent-safe, but queue may still have
|
113
|
+
# some things in it after drain, if there are concurrent writers.
|
114
|
+
def drain_queue(queue)
|
115
|
+
result = []
|
116
|
+
|
117
|
+
queue_size = queue.size
|
118
|
+
begin
|
119
|
+
queue_size.times do
|
120
|
+
result << queue.deq(:raise_if_empty)
|
121
|
+
end
|
122
|
+
rescue ThreadError
|
123
|
+
# Need do nothing, queue was concurrently popped, no biggie
|
124
|
+
end
|
125
|
+
|
126
|
+
return result
|
127
|
+
end
|
128
|
+
module_function :drain_queue
|
129
|
+
|
130
|
+
def build_elements_map(api_root)
|
131
|
+
result = RestClient.get "#{api_root}/element_sets"
|
132
|
+
element_sets = JSON.parse(result.body)
|
133
|
+
element_sets_map = element_sets.map { |s| [s['id'], s['name']] }.to_h
|
134
|
+
elements_map = element_sets.map { |s| [s['name'], {}] }.to_h
|
135
|
+
|
136
|
+
result = RestClient.get "#{api_root}/elements"
|
137
|
+
elements = JSON.parse(result.body)
|
138
|
+
elements.each do |element|
|
139
|
+
element_set_name = element_sets_map[element['element_set']['id']]
|
140
|
+
elements_map[element_set_name][element['name']] = element['id']
|
141
|
+
end
|
142
|
+
return elements_map
|
143
|
+
end
|
144
|
+
module_function :build_elements_map
|
145
|
+
end
|
146
|
+
end
|
@@ -0,0 +1,27 @@
|
|
1
|
+
class Omelette::XmlReader
|
2
|
+
attr_reader :settings, :folder
|
3
|
+
|
4
|
+
def initialize(settings, file_paths)
|
5
|
+
@settings = Omelette::Importer::Settings.new settings
|
6
|
+
@file_paths = file_paths
|
7
|
+
end
|
8
|
+
|
9
|
+
def logger
|
10
|
+
@logger ||= (@settings[:logger] || Yell.new(STDERR, :level => "gt.fatal"))
|
11
|
+
end
|
12
|
+
|
13
|
+
def each
|
14
|
+
return enum_for(:each) unless block_given?
|
15
|
+
|
16
|
+
|
17
|
+
@file_paths.each do |file|
|
18
|
+
begin
|
19
|
+
xml_doc = Nokogiri::XML(File.open(file))
|
20
|
+
xml_doc.remove_namespaces! if settings['remove_xml_namespaces'].to_s == 'true'
|
21
|
+
yield xml_doc, File.basename(file)
|
22
|
+
rescue => ex
|
23
|
+
self.logger.error "Problem processing file #{file}: #{ex.message}"
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
data/omelette.gemspec
ADDED
@@ -0,0 +1,36 @@
|
|
1
|
+
|
2
|
+
lib = File.expand_path("../lib", __FILE__)
|
3
|
+
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
4
|
+
require "omelette/version"
|
5
|
+
|
6
|
+
Gem::Specification.new do |spec|
|
7
|
+
spec.name = "omelette"
|
8
|
+
spec.version = Omelette::VERSION
|
9
|
+
spec.authors = ["Dazhi Jiao"]
|
10
|
+
spec.email = ["dazhi.jiao@gmail.com"]
|
11
|
+
|
12
|
+
spec.summary = "A tool that imports data into Omeka using the Omeka API"
|
13
|
+
spec.description = "Omelette is a Ruby gem that uses the Omeka API to import data. " \
|
14
|
+
"It has a DSL and can be used or extended to process almost any kinds of data." \
|
15
|
+
"It was inspired by the traject gem."
|
16
|
+
spec.homepage = "http://github.com/jiaola/omelette"
|
17
|
+
spec.license = "MIT"
|
18
|
+
|
19
|
+
spec.files = `git ls-files -z`.split("\x0").reject do |f|
|
20
|
+
f.match(%r{^(test|spec|features)/})
|
21
|
+
end
|
22
|
+
spec.bindir = "exe"
|
23
|
+
spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
|
24
|
+
spec.require_paths = ["lib"]
|
25
|
+
|
26
|
+
spec.add_runtime_dependency "thor", "~> 0.20"
|
27
|
+
spec.add_runtime_dependency "nokogiri", "~> 1.8"
|
28
|
+
spec.add_runtime_dependency "rest-client", "~> 2.0"
|
29
|
+
spec.add_runtime_dependency "concurrent-ruby", "~> 1.0"
|
30
|
+
spec.add_runtime_dependency "hashie", "~> 3.5"
|
31
|
+
spec.add_runtime_dependency "yell", "~> 2.0"
|
32
|
+
spec.add_development_dependency "bundler", "~> 1.16"
|
33
|
+
spec.add_development_dependency "rake", "~> 10.0"
|
34
|
+
spec.add_development_dependency "rspec", "~> 3.0"
|
35
|
+
spec.add_development_dependency "webmock", "~> 3.0"
|
36
|
+
end
|
metadata
ADDED
@@ -0,0 +1,213 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: omelette
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.1a
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Dazhi Jiao
|
8
|
+
autorequire:
|
9
|
+
bindir: exe
|
10
|
+
cert_chain: []
|
11
|
+
date: 2017-11-18 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: thor
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - "~>"
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '0.20'
|
20
|
+
type: :runtime
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - "~>"
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '0.20'
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: nokogiri
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - "~>"
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '1.8'
|
34
|
+
type: :runtime
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - "~>"
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '1.8'
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: rest-client
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - "~>"
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: '2.0'
|
48
|
+
type: :runtime
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - "~>"
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: '2.0'
|
55
|
+
- !ruby/object:Gem::Dependency
|
56
|
+
name: concurrent-ruby
|
57
|
+
requirement: !ruby/object:Gem::Requirement
|
58
|
+
requirements:
|
59
|
+
- - "~>"
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: '1.0'
|
62
|
+
type: :runtime
|
63
|
+
prerelease: false
|
64
|
+
version_requirements: !ruby/object:Gem::Requirement
|
65
|
+
requirements:
|
66
|
+
- - "~>"
|
67
|
+
- !ruby/object:Gem::Version
|
68
|
+
version: '1.0'
|
69
|
+
- !ruby/object:Gem::Dependency
|
70
|
+
name: hashie
|
71
|
+
requirement: !ruby/object:Gem::Requirement
|
72
|
+
requirements:
|
73
|
+
- - "~>"
|
74
|
+
- !ruby/object:Gem::Version
|
75
|
+
version: '3.5'
|
76
|
+
type: :runtime
|
77
|
+
prerelease: false
|
78
|
+
version_requirements: !ruby/object:Gem::Requirement
|
79
|
+
requirements:
|
80
|
+
- - "~>"
|
81
|
+
- !ruby/object:Gem::Version
|
82
|
+
version: '3.5'
|
83
|
+
- !ruby/object:Gem::Dependency
|
84
|
+
name: yell
|
85
|
+
requirement: !ruby/object:Gem::Requirement
|
86
|
+
requirements:
|
87
|
+
- - "~>"
|
88
|
+
- !ruby/object:Gem::Version
|
89
|
+
version: '2.0'
|
90
|
+
type: :runtime
|
91
|
+
prerelease: false
|
92
|
+
version_requirements: !ruby/object:Gem::Requirement
|
93
|
+
requirements:
|
94
|
+
- - "~>"
|
95
|
+
- !ruby/object:Gem::Version
|
96
|
+
version: '2.0'
|
97
|
+
- !ruby/object:Gem::Dependency
|
98
|
+
name: bundler
|
99
|
+
requirement: !ruby/object:Gem::Requirement
|
100
|
+
requirements:
|
101
|
+
- - "~>"
|
102
|
+
- !ruby/object:Gem::Version
|
103
|
+
version: '1.16'
|
104
|
+
type: :development
|
105
|
+
prerelease: false
|
106
|
+
version_requirements: !ruby/object:Gem::Requirement
|
107
|
+
requirements:
|
108
|
+
- - "~>"
|
109
|
+
- !ruby/object:Gem::Version
|
110
|
+
version: '1.16'
|
111
|
+
- !ruby/object:Gem::Dependency
|
112
|
+
name: rake
|
113
|
+
requirement: !ruby/object:Gem::Requirement
|
114
|
+
requirements:
|
115
|
+
- - "~>"
|
116
|
+
- !ruby/object:Gem::Version
|
117
|
+
version: '10.0'
|
118
|
+
type: :development
|
119
|
+
prerelease: false
|
120
|
+
version_requirements: !ruby/object:Gem::Requirement
|
121
|
+
requirements:
|
122
|
+
- - "~>"
|
123
|
+
- !ruby/object:Gem::Version
|
124
|
+
version: '10.0'
|
125
|
+
- !ruby/object:Gem::Dependency
|
126
|
+
name: rspec
|
127
|
+
requirement: !ruby/object:Gem::Requirement
|
128
|
+
requirements:
|
129
|
+
- - "~>"
|
130
|
+
- !ruby/object:Gem::Version
|
131
|
+
version: '3.0'
|
132
|
+
type: :development
|
133
|
+
prerelease: false
|
134
|
+
version_requirements: !ruby/object:Gem::Requirement
|
135
|
+
requirements:
|
136
|
+
- - "~>"
|
137
|
+
- !ruby/object:Gem::Version
|
138
|
+
version: '3.0'
|
139
|
+
- !ruby/object:Gem::Dependency
|
140
|
+
name: webmock
|
141
|
+
requirement: !ruby/object:Gem::Requirement
|
142
|
+
requirements:
|
143
|
+
- - "~>"
|
144
|
+
- !ruby/object:Gem::Version
|
145
|
+
version: '3.0'
|
146
|
+
type: :development
|
147
|
+
prerelease: false
|
148
|
+
version_requirements: !ruby/object:Gem::Requirement
|
149
|
+
requirements:
|
150
|
+
- - "~>"
|
151
|
+
- !ruby/object:Gem::Version
|
152
|
+
version: '3.0'
|
153
|
+
description: Omelette is a Ruby gem that uses the Omeka API to import data. It has
|
154
|
+
a DSL and can be used or extended to process almost any kinds of data.It was inspired
|
155
|
+
by the traject gem.
|
156
|
+
email:
|
157
|
+
- dazhi.jiao@gmail.com
|
158
|
+
executables:
|
159
|
+
- omelette
|
160
|
+
extensions: []
|
161
|
+
extra_rdoc_files: []
|
162
|
+
files:
|
163
|
+
- ".gitignore"
|
164
|
+
- ".rspec"
|
165
|
+
- ".travis.yml"
|
166
|
+
- Gemfile
|
167
|
+
- Gemfile.lock
|
168
|
+
- LICENSE.txt
|
169
|
+
- README.md
|
170
|
+
- Rakefile
|
171
|
+
- bin/console
|
172
|
+
- bin/setup
|
173
|
+
- exe/omelette
|
174
|
+
- lib/omelette.rb
|
175
|
+
- lib/omelette/command_line.rb
|
176
|
+
- lib/omelette/importer.rb
|
177
|
+
- lib/omelette/importer/context.rb
|
178
|
+
- lib/omelette/importer/errors.rb
|
179
|
+
- lib/omelette/importer/settings.rb
|
180
|
+
- lib/omelette/importer/steps.rb
|
181
|
+
- lib/omelette/macros/xpath.rb
|
182
|
+
- lib/omelette/null_writer.rb
|
183
|
+
- lib/omelette/omeka_json_writer.rb
|
184
|
+
- lib/omelette/thread_pool.rb
|
185
|
+
- lib/omelette/util.rb
|
186
|
+
- lib/omelette/version.rb
|
187
|
+
- lib/omelette/xml_reader.rb
|
188
|
+
- omelette.gemspec
|
189
|
+
homepage: http://github.com/jiaola/omelette
|
190
|
+
licenses:
|
191
|
+
- MIT
|
192
|
+
metadata: {}
|
193
|
+
post_install_message:
|
194
|
+
rdoc_options: []
|
195
|
+
require_paths:
|
196
|
+
- lib
|
197
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
198
|
+
requirements:
|
199
|
+
- - ">="
|
200
|
+
- !ruby/object:Gem::Version
|
201
|
+
version: '0'
|
202
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
203
|
+
requirements:
|
204
|
+
- - ">"
|
205
|
+
- !ruby/object:Gem::Version
|
206
|
+
version: 1.3.1
|
207
|
+
requirements: []
|
208
|
+
rubyforge_project:
|
209
|
+
rubygems_version: 2.6.12
|
210
|
+
signing_key:
|
211
|
+
specification_version: 4
|
212
|
+
summary: A tool that imports data into Omeka using the Omeka API
|
213
|
+
test_files: []
|