transformator 0.1.4 → 1.0.0.pre1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Gemfile +7 -2
- data/Rakefile +0 -43
- data/bin/transformator +4 -0
- data/lib/transformator.rb +13 -29
- data/lib/transformator/cli.rb +42 -0
- data/lib/transformator/dispatcher.rb +43 -0
- data/lib/transformator/filesystem_pattern_evaluator.rb +14 -0
- data/lib/transformator/gzip_reader.rb +0 -0
- data/lib/transformator/gzip_writer.rb +0 -0
- data/lib/transformator/null_processor.rb +9 -0
- data/lib/transformator/oga_xml_dumper.rb +10 -0
- data/lib/transformator/oga_xml_parser.rb +10 -0
- data/lib/transformator/ox_xml_dumper.rb +10 -0
- data/lib/transformator/ox_xml_parser.rb +10 -0
- data/lib/transformator/parallel_processor.rb +39 -0
- data/lib/transformator/processor.rb +4 -0
- data/lib/transformator/tar_reader.rb +51 -0
- data/lib/transformator/tar_reader/patched_rubygems_tar_reader.rb +41 -0
- data/lib/transformator/tar_writer.rb +0 -0
- data/lib/transformator/version.rb +1 -1
- data/spec/spec_helper.rb +0 -1
- data/spec/transformator_spec.rb +0 -8
- data/transformator.gemspec +7 -6
- data/ubpb.yml.erb +25 -0
- metadata +49 -67
- data/assets/primo_search_response.xml +0 -2878
- data/assets/primo_search_response_1.xml +0 -2467
- data/examples/primo_search_response_transformation.rb +0 -123
- data/examples/search_request_transformation.rb +0 -89
- data/lib/transformator/dsl.rb +0 -93
- data/lib/transformator/format_converter.rb +0 -27
- data/lib/transformator/format_converter/document_from_hash.rb +0 -13
- data/lib/transformator/format_converter/document_from_object.rb +0 -20
- data/lib/transformator/format_converter/document_from_xml.rb +0 -28
- data/lib/transformator/format_converter/hash_from_document.rb +0 -61
- data/lib/transformator/format_converter/xml_from_document.rb +0 -7
- data/lib/transformator/transformation.rb +0 -91
- data/spec/examples/primo_search_response_transformation_spec.rb +0 -19
- data/spec/examples/search_request_transformation_spec.rb +0 -48
- data/spec/transformator/dsl_spec.rb +0 -187
- data/spec/transformator/format_converter/hash_from_document_spec.rb +0 -42
- data/spec/transformator/transformation_spec.rb +0 -112
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 28e94ca52f9dd55df7e51dfed2433523c44e98d0
|
4
|
+
data.tar.gz: 904f122a44e0d63526d056cf027a4331348f32a3
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: b289c83d07d9c9a33c95e8612907479bed4b693a83b2873279411d4635d675dc64f2067acab65f7321474f4382ff43fc27ff3c63e45401ed271d9dffa35150f2
|
7
|
+
data.tar.gz: c769832a6ee38567c1e4b1a0189947c1a9df0467b91850073b10a915883beb970fb2aab75b5a567ab333e5ead0e0e15737f66c7d85778b2afb01a4402dbc9ac2
|
data/Gemfile
CHANGED
@@ -4,6 +4,11 @@ source "https://rubygems.org"
|
|
4
4
|
gemspec
|
5
5
|
|
6
6
|
gem "pry", "~> 0.9.12.6"
|
7
|
-
gem "pry-byebug", "<= 1.3.2"
|
8
|
-
gem "pry-stack_explorer", "~> 0.4.9.1"
|
9
7
|
gem "pry-syntax-hacks", "~> 0.0.6"
|
8
|
+
|
9
|
+
if RUBY_ENGINE == "ruby"
|
10
|
+
gem "pry-byebug", "<= 1.3.2"
|
11
|
+
gem "pry-stack_explorer", "~> 0.4.9.1"
|
12
|
+
else
|
13
|
+
#gem "pry-nav", "~> 0.2.4"
|
14
|
+
end
|
data/Rakefile
CHANGED
@@ -3,47 +3,4 @@ require "rspec/core/rake_task"
|
|
3
3
|
|
4
4
|
RSpec::Core::RakeTask.new(:spec)
|
5
5
|
|
6
|
-
task :benchmark do
|
7
|
-
require "benchmark/ips"
|
8
|
-
require "pry"
|
9
|
-
require_relative "./benchmark/primo_search_response_transformation"
|
10
|
-
require_relative "./lib/transformator"
|
11
|
-
|
12
|
-
Benchmark.ips do |bm|
|
13
|
-
bm.report("Transformator::Benchmark::PrimoSearchResponseTransformation") do
|
14
|
-
Transformator::Benchmark::PrimoSearchResponseTransformation.new.apply(
|
15
|
-
to: File.read(File.expand_path(File.join(File.dirname(__FILE__), "benchmark/primo_search_response_transformation/primo_response_1.xml"))),
|
16
|
-
output: :hash
|
17
|
-
)
|
18
|
-
end
|
19
|
-
end
|
20
|
-
|
21
|
-
=begin
|
22
|
-
#
|
23
|
-
# document_from_xml
|
24
|
-
#
|
25
|
-
xml_file_name = File.join(File.dirname(__FILE__), "benchmark/primo_search_response_transformation/primo_response.xml")
|
26
|
-
outer_document = Transformator.document_from_xml(File.read(xml_file_name))
|
27
|
-
|
28
|
-
Benchmark.ips do |bm|
|
29
|
-
bm.report("Transformator.document_from_xml") do
|
30
|
-
Transformator.document_from_xml(outer_document.locate("*/searchBriefReturn").first.text)
|
31
|
-
end
|
32
|
-
end
|
33
|
-
|
34
|
-
#
|
35
|
-
# hash_from_document
|
36
|
-
#
|
37
|
-
xml_file_name = File.join(File.dirname(__FILE__), "benchmark/primo_search_response_transformation/primo_response.xml")
|
38
|
-
outer_document = Transformator.document_from_xml(File.read(xml_file_name))
|
39
|
-
inner_document = Transformator.document_from_xml(outer_document.locate("*/searchBriefReturn").first.text)
|
40
|
-
|
41
|
-
Benchmark.ips do |bm|
|
42
|
-
bm.report("Transformator.hash_from_document") do
|
43
|
-
Transformator.hash_from_document(inner_document)
|
44
|
-
end
|
45
|
-
end
|
46
|
-
=end
|
47
|
-
end
|
48
|
-
|
49
6
|
task :default => :spec
|
data/bin/transformator
ADDED
data/lib/transformator.rb
CHANGED
@@ -1,33 +1,17 @@
|
|
1
|
-
require "ox"
|
2
1
|
require "transformator/version"
|
3
2
|
|
4
3
|
module Transformator
|
5
|
-
require_relative "./transformator/
|
6
|
-
require_relative "./transformator/
|
7
|
-
require_relative "./transformator/
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
elsif obj.is_a?(String) && obj[/\A\s*</]
|
19
|
-
:xml
|
20
|
-
elsif obj.nil?
|
21
|
-
nil
|
22
|
-
else
|
23
|
-
raise "Unkown format!"
|
24
|
-
end
|
25
|
-
end
|
26
|
-
|
27
|
-
def self.oxify_path(path)
|
28
|
-
path
|
29
|
-
.gsub(/\A\/\/(\S+)/, "*/\\1")
|
30
|
-
.gsub(/(\S*)\/\/(\S*)/, "\\1/*/\\2")
|
31
|
-
.gsub(/\A\/(\w+)(\S*)/, "?/\\1\\2") # replace "/foo" with "?/foo"
|
32
|
-
end
|
4
|
+
require_relative "./transformator/cli"
|
5
|
+
require_relative "./transformator/dispatcher"
|
6
|
+
require_relative "./transformator/filesystem_pattern_evaluator"
|
7
|
+
#require_relative "./transformator/gzip_reader"
|
8
|
+
#require_relative "./transformator/gzip_writer"
|
9
|
+
require_relative "./transformator/oga_xml_dumper"
|
10
|
+
require_relative "./transformator/oga_xml_parser"
|
11
|
+
#require_relative "./transformator/ox_xml_dumper"
|
12
|
+
#require_relative "./transformator/ox_xml_parser"
|
13
|
+
require_relative "./transformator/parallel_processor"
|
14
|
+
require_relative "./transformator/null_processor"
|
15
|
+
require_relative "./transformator/tar_reader"
|
16
|
+
#require_relative "./transformator/tar_writer"
|
33
17
|
end
|
@@ -0,0 +1,42 @@
|
|
1
|
+
require "erb"
|
2
|
+
require "optparse"
|
3
|
+
require "transformator"
|
4
|
+
require "yaml"
|
5
|
+
|
6
|
+
require "pry" # TODO: remove
|
7
|
+
|
8
|
+
class Transformator::Cli
|
9
|
+
def initialize(argv = [])
|
10
|
+
if argv.empty?
|
11
|
+
puts options_parser.help
|
12
|
+
else
|
13
|
+
@options = parse_argv(argv) || {}
|
14
|
+
end
|
15
|
+
|
16
|
+
Transformator::Dispatcher.new(
|
17
|
+
YAML.load(
|
18
|
+
ERB.new(
|
19
|
+
File.read(
|
20
|
+
File.expand_path(@options[:config_file_name])
|
21
|
+
)
|
22
|
+
).result
|
23
|
+
)
|
24
|
+
).call
|
25
|
+
end
|
26
|
+
|
27
|
+
def parse_argv(argv)
|
28
|
+
presence({}.tap do |result|
|
29
|
+
OptionParser.new do |opts|
|
30
|
+
opts.banner = "Usage: transformator [options]"
|
31
|
+
|
32
|
+
opts.on( "-c", "--config-file FILE", "Configuration file in yaml format" ) do |config_file_name|
|
33
|
+
result[:config_file_name] = config_file_name
|
34
|
+
end
|
35
|
+
end.parse(argv)
|
36
|
+
end)
|
37
|
+
end
|
38
|
+
|
39
|
+
def presence(object)
|
40
|
+
(object.empty? rescue false) ? nil : object
|
41
|
+
end
|
42
|
+
end
|
@@ -0,0 +1,43 @@
|
|
1
|
+
require "transformator"
|
2
|
+
|
3
|
+
class Transformator::Dispatcher
|
4
|
+
class State
|
5
|
+
attr_accessor :finished
|
6
|
+
attr_accessor :result
|
7
|
+
|
8
|
+
def initialize(initial_result)
|
9
|
+
@initial_result = initial_result
|
10
|
+
reset_result!
|
11
|
+
end
|
12
|
+
|
13
|
+
def finished?
|
14
|
+
@finished == true
|
15
|
+
end
|
16
|
+
|
17
|
+
def reset_result!
|
18
|
+
@result = @initial_result.dup
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
22
|
+
def initialize(processors = [])
|
23
|
+
@processors = processors.map do |processor|
|
24
|
+
if processor["options"]
|
25
|
+
processor["class"].new(processor["options"])
|
26
|
+
else
|
27
|
+
processor["class"].new
|
28
|
+
end
|
29
|
+
end
|
30
|
+
|
31
|
+
@state = State.new([])
|
32
|
+
end
|
33
|
+
|
34
|
+
def call
|
35
|
+
until @state.finished?
|
36
|
+
@processors.each do |processor|
|
37
|
+
processor.call!(@state)
|
38
|
+
end
|
39
|
+
|
40
|
+
@state.reset_result!
|
41
|
+
end
|
42
|
+
end
|
43
|
+
end
|
@@ -0,0 +1,14 @@
|
|
1
|
+
require "transformator/processor"
|
2
|
+
|
3
|
+
class Transformator::FilesystemPatternEvaluator < Transformator::Processor
|
4
|
+
def initialize(options = {})
|
5
|
+
@patterns = options["patterns"]
|
6
|
+
end
|
7
|
+
|
8
|
+
def call!(state)
|
9
|
+
state.result = @patterns.map do |pattern|
|
10
|
+
Dir.glob(File.expand_path(pattern))
|
11
|
+
end.flatten
|
12
|
+
binding.pry
|
13
|
+
end
|
14
|
+
end
|
File without changes
|
File without changes
|
@@ -0,0 +1,39 @@
|
|
1
|
+
require "transformator/processor"
|
2
|
+
require "thread/pool"
|
3
|
+
|
4
|
+
class Transformator::ParallelProcessor < Transformator::Processor
|
5
|
+
def initialize(options = {})
|
6
|
+
@processors = options["processors"].map do |processor|
|
7
|
+
if processor["options"]
|
8
|
+
processor["class"].new(processor["options"])
|
9
|
+
else
|
10
|
+
processor["class"].new
|
11
|
+
end
|
12
|
+
end
|
13
|
+
|
14
|
+
@number_of_workers = options["workers"]
|
15
|
+
@thread_pool = Thread.pool(@number_of_workers)
|
16
|
+
end
|
17
|
+
|
18
|
+
def call!(state)
|
19
|
+
if state.result.length > 0
|
20
|
+
chunk_size_per_worker = state.result.length.fdiv(@number_of_workers).ceil
|
21
|
+
results_enumerator = state.result.each_slice(chunk_size_per_worker)
|
22
|
+
|
23
|
+
chunks = Array.new(@number_of_workers).map do |_|
|
24
|
+
results_enumerator.next
|
25
|
+
end
|
26
|
+
|
27
|
+
@number_of_workers.times do |index|
|
28
|
+
@thread_pool.process do
|
29
|
+
@processors.each do |processor|
|
30
|
+
processor.call!(Struct.new(:result).new(chunks[index]))
|
31
|
+
end
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
35
|
+
@thread_pool.wait_done
|
36
|
+
state.result = chunks.inject(&:concat)
|
37
|
+
end
|
38
|
+
end
|
39
|
+
end
|
@@ -0,0 +1,51 @@
|
|
1
|
+
require "transformator/processor"
|
2
|
+
|
3
|
+
class Transformator::TarReader < Transformator::Processor
|
4
|
+
require_relative "./tar_reader/patched_rubygems_tar_reader"
|
5
|
+
|
6
|
+
attr_accessor :files
|
7
|
+
|
8
|
+
def initialize(options = {})
|
9
|
+
@bulk_size = options["bulk_size"]
|
10
|
+
@current_archive_enumerator = nil
|
11
|
+
|
12
|
+
if options["files"]
|
13
|
+
@files = ensure_array(options["files"]).map do |filename|
|
14
|
+
Dir.glob(File.expand_path(filename))
|
15
|
+
end.flatten
|
16
|
+
end
|
17
|
+
end
|
18
|
+
|
19
|
+
def call!(state)
|
20
|
+
@files = state.result.slice!(0..-1) unless @files
|
21
|
+
|
22
|
+
if @current_archive_enumerator.nil?
|
23
|
+
if !@files.empty?
|
24
|
+
@current_archive_enumerator = PatchedRubygemsTarReader.new(
|
25
|
+
Zlib::GzipReader.open(
|
26
|
+
File.expand_path(@files.pop)
|
27
|
+
)
|
28
|
+
).each
|
29
|
+
else
|
30
|
+
state.finished = true
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
@bulk_size.times do
|
35
|
+
begin
|
36
|
+
if @current_archive_enumerator
|
37
|
+
state.result.push @current_archive_enumerator.next
|
38
|
+
end
|
39
|
+
rescue StopIteration
|
40
|
+
@current_archive_enumerator = nil
|
41
|
+
end
|
42
|
+
end
|
43
|
+
end
|
44
|
+
|
45
|
+
#
|
46
|
+
private
|
47
|
+
#
|
48
|
+
def ensure_array(object)
|
49
|
+
object.is_a?(Array) ? object : [object]
|
50
|
+
end
|
51
|
+
end
|
@@ -0,0 +1,41 @@
|
|
1
|
+
#--
|
2
|
+
# Copyright (C) 2004 Mauricio Julio Fernández Pradier
|
3
|
+
# See LICENSE.txt for additional licensing information.
|
4
|
+
#++
|
5
|
+
require "rubygems/package"
|
6
|
+
|
7
|
+
class Transformator::TarReader::PatchedRubygemsTarReader < Gem::Package::TarReader
|
8
|
+
def each
|
9
|
+
return enum_for __method__ unless block_given?
|
10
|
+
|
11
|
+
until @io.eof? do
|
12
|
+
header = Gem::Package::TarHeader.from @io
|
13
|
+
return if header.empty?
|
14
|
+
|
15
|
+
entry = Gem::Package::TarReader::Entry.new header, @io
|
16
|
+
size = entry.header.size
|
17
|
+
|
18
|
+
yield entry.read
|
19
|
+
|
20
|
+
skip = (512 - (size % 512)) % 512
|
21
|
+
pending = size - entry.bytes_read
|
22
|
+
|
23
|
+
begin
|
24
|
+
# avoid reading...
|
25
|
+
@io.seek pending, IO::SEEK_CUR
|
26
|
+
pending = 0
|
27
|
+
rescue Errno::EINVAL, NameError
|
28
|
+
while pending > 0 do
|
29
|
+
bytes_read = @io.read([pending, 4096].min).size
|
30
|
+
raise UnexpectedEOF if @io.eof?
|
31
|
+
pending -= bytes_read
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
35
|
+
@io.read skip # discard trailing zeros
|
36
|
+
|
37
|
+
# make sure nobody can use #read, #getc or #rewind anymore
|
38
|
+
entry.close
|
39
|
+
end
|
40
|
+
end
|
41
|
+
end
|
File without changes
|