transformator 0.1.4 → 1.0.0.pre1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Gemfile +7 -2
- data/Rakefile +0 -43
- data/bin/transformator +4 -0
- data/lib/transformator.rb +13 -29
- data/lib/transformator/cli.rb +42 -0
- data/lib/transformator/dispatcher.rb +43 -0
- data/lib/transformator/filesystem_pattern_evaluator.rb +14 -0
- data/lib/transformator/gzip_reader.rb +0 -0
- data/lib/transformator/gzip_writer.rb +0 -0
- data/lib/transformator/null_processor.rb +9 -0
- data/lib/transformator/oga_xml_dumper.rb +10 -0
- data/lib/transformator/oga_xml_parser.rb +10 -0
- data/lib/transformator/ox_xml_dumper.rb +10 -0
- data/lib/transformator/ox_xml_parser.rb +10 -0
- data/lib/transformator/parallel_processor.rb +39 -0
- data/lib/transformator/processor.rb +4 -0
- data/lib/transformator/tar_reader.rb +51 -0
- data/lib/transformator/tar_reader/patched_rubygems_tar_reader.rb +41 -0
- data/lib/transformator/tar_writer.rb +0 -0
- data/lib/transformator/version.rb +1 -1
- data/spec/spec_helper.rb +0 -1
- data/spec/transformator_spec.rb +0 -8
- data/transformator.gemspec +7 -6
- data/ubpb.yml.erb +25 -0
- metadata +49 -67
- data/assets/primo_search_response.xml +0 -2878
- data/assets/primo_search_response_1.xml +0 -2467
- data/examples/primo_search_response_transformation.rb +0 -123
- data/examples/search_request_transformation.rb +0 -89
- data/lib/transformator/dsl.rb +0 -93
- data/lib/transformator/format_converter.rb +0 -27
- data/lib/transformator/format_converter/document_from_hash.rb +0 -13
- data/lib/transformator/format_converter/document_from_object.rb +0 -20
- data/lib/transformator/format_converter/document_from_xml.rb +0 -28
- data/lib/transformator/format_converter/hash_from_document.rb +0 -61
- data/lib/transformator/format_converter/xml_from_document.rb +0 -7
- data/lib/transformator/transformation.rb +0 -91
- data/spec/examples/primo_search_response_transformation_spec.rb +0 -19
- data/spec/examples/search_request_transformation_spec.rb +0 -48
- data/spec/transformator/dsl_spec.rb +0 -187
- data/spec/transformator/format_converter/hash_from_document_spec.rb +0 -42
- data/spec/transformator/transformation_spec.rb +0 -112
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 28e94ca52f9dd55df7e51dfed2433523c44e98d0
|
4
|
+
data.tar.gz: 904f122a44e0d63526d056cf027a4331348f32a3
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: b289c83d07d9c9a33c95e8612907479bed4b693a83b2873279411d4635d675dc64f2067acab65f7321474f4382ff43fc27ff3c63e45401ed271d9dffa35150f2
|
7
|
+
data.tar.gz: c769832a6ee38567c1e4b1a0189947c1a9df0467b91850073b10a915883beb970fb2aab75b5a567ab333e5ead0e0e15737f66c7d85778b2afb01a4402dbc9ac2
|
data/Gemfile
CHANGED
@@ -4,6 +4,11 @@ source "https://rubygems.org"
|
|
4
4
|
gemspec
|
5
5
|
|
6
6
|
gem "pry", "~> 0.9.12.6"
|
7
|
-
gem "pry-byebug", "<= 1.3.2"
|
8
|
-
gem "pry-stack_explorer", "~> 0.4.9.1"
|
9
7
|
gem "pry-syntax-hacks", "~> 0.0.6"
|
8
|
+
|
9
|
+
if RUBY_ENGINE == "ruby"
|
10
|
+
gem "pry-byebug", "<= 1.3.2"
|
11
|
+
gem "pry-stack_explorer", "~> 0.4.9.1"
|
12
|
+
else
|
13
|
+
#gem "pry-nav", "~> 0.2.4"
|
14
|
+
end
|
data/Rakefile
CHANGED
@@ -3,47 +3,4 @@ require "rspec/core/rake_task"
|
|
3
3
|
|
4
4
|
RSpec::Core::RakeTask.new(:spec)
|
5
5
|
|
6
|
-
task :benchmark do
|
7
|
-
require "benchmark/ips"
|
8
|
-
require "pry"
|
9
|
-
require_relative "./benchmark/primo_search_response_transformation"
|
10
|
-
require_relative "./lib/transformator"
|
11
|
-
|
12
|
-
Benchmark.ips do |bm|
|
13
|
-
bm.report("Transformator::Benchmark::PrimoSearchResponseTransformation") do
|
14
|
-
Transformator::Benchmark::PrimoSearchResponseTransformation.new.apply(
|
15
|
-
to: File.read(File.expand_path(File.join(File.dirname(__FILE__), "benchmark/primo_search_response_transformation/primo_response_1.xml"))),
|
16
|
-
output: :hash
|
17
|
-
)
|
18
|
-
end
|
19
|
-
end
|
20
|
-
|
21
|
-
=begin
|
22
|
-
#
|
23
|
-
# document_from_xml
|
24
|
-
#
|
25
|
-
xml_file_name = File.join(File.dirname(__FILE__), "benchmark/primo_search_response_transformation/primo_response.xml")
|
26
|
-
outer_document = Transformator.document_from_xml(File.read(xml_file_name))
|
27
|
-
|
28
|
-
Benchmark.ips do |bm|
|
29
|
-
bm.report("Transformator.document_from_xml") do
|
30
|
-
Transformator.document_from_xml(outer_document.locate("*/searchBriefReturn").first.text)
|
31
|
-
end
|
32
|
-
end
|
33
|
-
|
34
|
-
#
|
35
|
-
# hash_from_document
|
36
|
-
#
|
37
|
-
xml_file_name = File.join(File.dirname(__FILE__), "benchmark/primo_search_response_transformation/primo_response.xml")
|
38
|
-
outer_document = Transformator.document_from_xml(File.read(xml_file_name))
|
39
|
-
inner_document = Transformator.document_from_xml(outer_document.locate("*/searchBriefReturn").first.text)
|
40
|
-
|
41
|
-
Benchmark.ips do |bm|
|
42
|
-
bm.report("Transformator.hash_from_document") do
|
43
|
-
Transformator.hash_from_document(inner_document)
|
44
|
-
end
|
45
|
-
end
|
46
|
-
=end
|
47
|
-
end
|
48
|
-
|
49
6
|
task :default => :spec
|
data/bin/transformator
ADDED
data/lib/transformator.rb
CHANGED
@@ -1,33 +1,17 @@
|
|
1
|
-
require "ox"
|
2
1
|
require "transformator/version"
|
3
2
|
|
4
3
|
module Transformator
|
5
|
-
require_relative "./transformator/
|
6
|
-
require_relative "./transformator/
|
7
|
-
require_relative "./transformator/
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
elsif obj.is_a?(String) && obj[/\A\s*</]
|
19
|
-
:xml
|
20
|
-
elsif obj.nil?
|
21
|
-
nil
|
22
|
-
else
|
23
|
-
raise "Unkown format!"
|
24
|
-
end
|
25
|
-
end
|
26
|
-
|
27
|
-
def self.oxify_path(path)
|
28
|
-
path
|
29
|
-
.gsub(/\A\/\/(\S+)/, "*/\\1")
|
30
|
-
.gsub(/(\S*)\/\/(\S*)/, "\\1/*/\\2")
|
31
|
-
.gsub(/\A\/(\w+)(\S*)/, "?/\\1\\2") # replace "/foo" with "?/foo"
|
32
|
-
end
|
4
|
+
require_relative "./transformator/cli"
|
5
|
+
require_relative "./transformator/dispatcher"
|
6
|
+
require_relative "./transformator/filesystem_pattern_evaluator"
|
7
|
+
#require_relative "./transformator/gzip_reader"
|
8
|
+
#require_relative "./transformator/gzip_writer"
|
9
|
+
require_relative "./transformator/oga_xml_dumper"
|
10
|
+
require_relative "./transformator/oga_xml_parser"
|
11
|
+
#require_relative "./transformator/ox_xml_dumper"
|
12
|
+
#require_relative "./transformator/ox_xml_parser"
|
13
|
+
require_relative "./transformator/parallel_processor"
|
14
|
+
require_relative "./transformator/null_processor"
|
15
|
+
require_relative "./transformator/tar_reader"
|
16
|
+
#require_relative "./transformator/tar_writer"
|
33
17
|
end
|
@@ -0,0 +1,42 @@
|
|
1
|
+
require "erb"
|
2
|
+
require "optparse"
|
3
|
+
require "transformator"
|
4
|
+
require "yaml"
|
5
|
+
|
6
|
+
require "pry" # TODO: remove
|
7
|
+
|
8
|
+
class Transformator::Cli
|
9
|
+
def initialize(argv = [])
|
10
|
+
if argv.empty?
|
11
|
+
puts options_parser.help
|
12
|
+
else
|
13
|
+
@options = parse_argv(argv) || {}
|
14
|
+
end
|
15
|
+
|
16
|
+
Transformator::Dispatcher.new(
|
17
|
+
YAML.load(
|
18
|
+
ERB.new(
|
19
|
+
File.read(
|
20
|
+
File.expand_path(@options[:config_file_name])
|
21
|
+
)
|
22
|
+
).result
|
23
|
+
)
|
24
|
+
).call
|
25
|
+
end
|
26
|
+
|
27
|
+
def parse_argv(argv)
|
28
|
+
presence({}.tap do |result|
|
29
|
+
OptionParser.new do |opts|
|
30
|
+
opts.banner = "Usage: transformator [options]"
|
31
|
+
|
32
|
+
opts.on( "-c", "--config-file FILE", "Configuration file in yaml format" ) do |config_file_name|
|
33
|
+
result[:config_file_name] = config_file_name
|
34
|
+
end
|
35
|
+
end.parse(argv)
|
36
|
+
end)
|
37
|
+
end
|
38
|
+
|
39
|
+
def presence(object)
|
40
|
+
(object.empty? rescue false) ? nil : object
|
41
|
+
end
|
42
|
+
end
|
@@ -0,0 +1,43 @@
|
|
1
|
+
require "transformator"
|
2
|
+
|
3
|
+
class Transformator::Dispatcher
|
4
|
+
class State
|
5
|
+
attr_accessor :finished
|
6
|
+
attr_accessor :result
|
7
|
+
|
8
|
+
def initialize(initial_result)
|
9
|
+
@initial_result = initial_result
|
10
|
+
reset_result!
|
11
|
+
end
|
12
|
+
|
13
|
+
def finished?
|
14
|
+
@finished == true
|
15
|
+
end
|
16
|
+
|
17
|
+
def reset_result!
|
18
|
+
@result = @initial_result.dup
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
22
|
+
def initialize(processors = [])
|
23
|
+
@processors = processors.map do |processor|
|
24
|
+
if processor["options"]
|
25
|
+
processor["class"].new(processor["options"])
|
26
|
+
else
|
27
|
+
processor["class"].new
|
28
|
+
end
|
29
|
+
end
|
30
|
+
|
31
|
+
@state = State.new([])
|
32
|
+
end
|
33
|
+
|
34
|
+
def call
|
35
|
+
until @state.finished?
|
36
|
+
@processors.each do |processor|
|
37
|
+
processor.call!(@state)
|
38
|
+
end
|
39
|
+
|
40
|
+
@state.reset_result!
|
41
|
+
end
|
42
|
+
end
|
43
|
+
end
|
@@ -0,0 +1,14 @@
|
|
1
|
+
require "transformator/processor"
|
2
|
+
|
3
|
+
class Transformator::FilesystemPatternEvaluator < Transformator::Processor
|
4
|
+
def initialize(options = {})
|
5
|
+
@patterns = options["patterns"]
|
6
|
+
end
|
7
|
+
|
8
|
+
def call!(state)
|
9
|
+
state.result = @patterns.map do |pattern|
|
10
|
+
Dir.glob(File.expand_path(pattern))
|
11
|
+
end.flatten
|
12
|
+
binding.pry
|
13
|
+
end
|
14
|
+
end
|
File without changes
|
File without changes
|
@@ -0,0 +1,39 @@
|
|
1
|
+
require "transformator/processor"
|
2
|
+
require "thread/pool"
|
3
|
+
|
4
|
+
class Transformator::ParallelProcessor < Transformator::Processor
|
5
|
+
def initialize(options = {})
|
6
|
+
@processors = options["processors"].map do |processor|
|
7
|
+
if processor["options"]
|
8
|
+
processor["class"].new(processor["options"])
|
9
|
+
else
|
10
|
+
processor["class"].new
|
11
|
+
end
|
12
|
+
end
|
13
|
+
|
14
|
+
@number_of_workers = options["workers"]
|
15
|
+
@thread_pool = Thread.pool(@number_of_workers)
|
16
|
+
end
|
17
|
+
|
18
|
+
def call!(state)
|
19
|
+
if state.result.length > 0
|
20
|
+
chunk_size_per_worker = state.result.length.fdiv(@number_of_workers).ceil
|
21
|
+
results_enumerator = state.result.each_slice(chunk_size_per_worker)
|
22
|
+
|
23
|
+
chunks = Array.new(@number_of_workers).map do |_|
|
24
|
+
results_enumerator.next
|
25
|
+
end
|
26
|
+
|
27
|
+
@number_of_workers.times do |index|
|
28
|
+
@thread_pool.process do
|
29
|
+
@processors.each do |processor|
|
30
|
+
processor.call!(Struct.new(:result).new(chunks[index]))
|
31
|
+
end
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
35
|
+
@thread_pool.wait_done
|
36
|
+
state.result = chunks.inject(&:concat)
|
37
|
+
end
|
38
|
+
end
|
39
|
+
end
|
@@ -0,0 +1,51 @@
|
|
1
|
+
require "transformator/processor"
|
2
|
+
|
3
|
+
class Transformator::TarReader < Transformator::Processor
|
4
|
+
require_relative "./tar_reader/patched_rubygems_tar_reader"
|
5
|
+
|
6
|
+
attr_accessor :files
|
7
|
+
|
8
|
+
def initialize(options = {})
|
9
|
+
@bulk_size = options["bulk_size"]
|
10
|
+
@current_archive_enumerator = nil
|
11
|
+
|
12
|
+
if options["files"]
|
13
|
+
@files = ensure_array(options["files"]).map do |filename|
|
14
|
+
Dir.glob(File.expand_path(filename))
|
15
|
+
end.flatten
|
16
|
+
end
|
17
|
+
end
|
18
|
+
|
19
|
+
def call!(state)
|
20
|
+
@files = state.result.slice!(0..-1) unless @files
|
21
|
+
|
22
|
+
if @current_archive_enumerator.nil?
|
23
|
+
if !@files.empty?
|
24
|
+
@current_archive_enumerator = PatchedRubygemsTarReader.new(
|
25
|
+
Zlib::GzipReader.open(
|
26
|
+
File.expand_path(@files.pop)
|
27
|
+
)
|
28
|
+
).each
|
29
|
+
else
|
30
|
+
state.finished = true
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
@bulk_size.times do
|
35
|
+
begin
|
36
|
+
if @current_archive_enumerator
|
37
|
+
state.result.push @current_archive_enumerator.next
|
38
|
+
end
|
39
|
+
rescue StopIteration
|
40
|
+
@current_archive_enumerator = nil
|
41
|
+
end
|
42
|
+
end
|
43
|
+
end
|
44
|
+
|
45
|
+
#
|
46
|
+
private
|
47
|
+
#
|
48
|
+
def ensure_array(object)
|
49
|
+
object.is_a?(Array) ? object : [object]
|
50
|
+
end
|
51
|
+
end
|
@@ -0,0 +1,41 @@
|
|
1
|
+
#--
|
2
|
+
# Copyright (C) 2004 Mauricio Julio Fernández Pradier
|
3
|
+
# See LICENSE.txt for additional licensing information.
|
4
|
+
#++
|
5
|
+
require "rubygems/package"
|
6
|
+
|
7
|
+
class Transformator::TarReader::PatchedRubygemsTarReader < Gem::Package::TarReader
|
8
|
+
def each
|
9
|
+
return enum_for __method__ unless block_given?
|
10
|
+
|
11
|
+
until @io.eof? do
|
12
|
+
header = Gem::Package::TarHeader.from @io
|
13
|
+
return if header.empty?
|
14
|
+
|
15
|
+
entry = Gem::Package::TarReader::Entry.new header, @io
|
16
|
+
size = entry.header.size
|
17
|
+
|
18
|
+
yield entry.read
|
19
|
+
|
20
|
+
skip = (512 - (size % 512)) % 512
|
21
|
+
pending = size - entry.bytes_read
|
22
|
+
|
23
|
+
begin
|
24
|
+
# avoid reading...
|
25
|
+
@io.seek pending, IO::SEEK_CUR
|
26
|
+
pending = 0
|
27
|
+
rescue Errno::EINVAL, NameError
|
28
|
+
while pending > 0 do
|
29
|
+
bytes_read = @io.read([pending, 4096].min).size
|
30
|
+
raise UnexpectedEOF if @io.eof?
|
31
|
+
pending -= bytes_read
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
35
|
+
@io.read skip # discard trailing zeros
|
36
|
+
|
37
|
+
# make sure nobody can use #read, #getc or #rewind anymore
|
38
|
+
entry.close
|
39
|
+
end
|
40
|
+
end
|
41
|
+
end
|
File without changes
|