transformator 0.1.4 → 1.0.0.pre1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (43) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile +7 -2
  3. data/Rakefile +0 -43
  4. data/bin/transformator +4 -0
  5. data/lib/transformator.rb +13 -29
  6. data/lib/transformator/cli.rb +42 -0
  7. data/lib/transformator/dispatcher.rb +43 -0
  8. data/lib/transformator/filesystem_pattern_evaluator.rb +14 -0
  9. data/lib/transformator/gzip_reader.rb +0 -0
  10. data/lib/transformator/gzip_writer.rb +0 -0
  11. data/lib/transformator/null_processor.rb +9 -0
  12. data/lib/transformator/oga_xml_dumper.rb +10 -0
  13. data/lib/transformator/oga_xml_parser.rb +10 -0
  14. data/lib/transformator/ox_xml_dumper.rb +10 -0
  15. data/lib/transformator/ox_xml_parser.rb +10 -0
  16. data/lib/transformator/parallel_processor.rb +39 -0
  17. data/lib/transformator/processor.rb +4 -0
  18. data/lib/transformator/tar_reader.rb +51 -0
  19. data/lib/transformator/tar_reader/patched_rubygems_tar_reader.rb +41 -0
  20. data/lib/transformator/tar_writer.rb +0 -0
  21. data/lib/transformator/version.rb +1 -1
  22. data/spec/spec_helper.rb +0 -1
  23. data/spec/transformator_spec.rb +0 -8
  24. data/transformator.gemspec +7 -6
  25. data/ubpb.yml.erb +25 -0
  26. metadata +49 -67
  27. data/assets/primo_search_response.xml +0 -2878
  28. data/assets/primo_search_response_1.xml +0 -2467
  29. data/examples/primo_search_response_transformation.rb +0 -123
  30. data/examples/search_request_transformation.rb +0 -89
  31. data/lib/transformator/dsl.rb +0 -93
  32. data/lib/transformator/format_converter.rb +0 -27
  33. data/lib/transformator/format_converter/document_from_hash.rb +0 -13
  34. data/lib/transformator/format_converter/document_from_object.rb +0 -20
  35. data/lib/transformator/format_converter/document_from_xml.rb +0 -28
  36. data/lib/transformator/format_converter/hash_from_document.rb +0 -61
  37. data/lib/transformator/format_converter/xml_from_document.rb +0 -7
  38. data/lib/transformator/transformation.rb +0 -91
  39. data/spec/examples/primo_search_response_transformation_spec.rb +0 -19
  40. data/spec/examples/search_request_transformation_spec.rb +0 -48
  41. data/spec/transformator/dsl_spec.rb +0 -187
  42. data/spec/transformator/format_converter/hash_from_document_spec.rb +0 -42
  43. data/spec/transformator/transformation_spec.rb +0 -112
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: de79125e94963cf8f64a509a0ad23c1f33b6404e
4
- data.tar.gz: bd978c37ba87a8a64c31240d48096f0a477d5b4a
3
+ metadata.gz: 28e94ca52f9dd55df7e51dfed2433523c44e98d0
4
+ data.tar.gz: 904f122a44e0d63526d056cf027a4331348f32a3
5
5
  SHA512:
6
- metadata.gz: 05aa679773f11e290dab9b3de31499dfd452940b6670c0e8894689e1605f9eb04b5e2a264440a1679d6935c05006b9d782d379b23ecfb212e41db643f0385dba
7
- data.tar.gz: c352e245758bbc162a87ac6c8d7b9fded4bb9f060972b7379303c23eb0696303810eb64f05488706af58253b26f37b583a8b3f13731d15c54e262152bd0bd6d4
6
+ metadata.gz: b289c83d07d9c9a33c95e8612907479bed4b693a83b2873279411d4635d675dc64f2067acab65f7321474f4382ff43fc27ff3c63e45401ed271d9dffa35150f2
7
+ data.tar.gz: c769832a6ee38567c1e4b1a0189947c1a9df0467b91850073b10a915883beb970fb2aab75b5a567ab333e5ead0e0e15737f66c7d85778b2afb01a4402dbc9ac2
data/Gemfile CHANGED
@@ -4,6 +4,11 @@ source "https://rubygems.org"
4
4
  gemspec
5
5
 
6
6
  gem "pry", "~> 0.9.12.6"
7
- gem "pry-byebug", "<= 1.3.2"
8
- gem "pry-stack_explorer", "~> 0.4.9.1"
9
7
  gem "pry-syntax-hacks", "~> 0.0.6"
8
+
9
+ if RUBY_ENGINE == "ruby"
10
+ gem "pry-byebug", "<= 1.3.2"
11
+ gem "pry-stack_explorer", "~> 0.4.9.1"
12
+ else
13
+ #gem "pry-nav", "~> 0.2.4"
14
+ end
data/Rakefile CHANGED
@@ -3,47 +3,4 @@ require "rspec/core/rake_task"
3
3
 
4
4
  RSpec::Core::RakeTask.new(:spec)
5
5
 
6
- task :benchmark do
7
- require "benchmark/ips"
8
- require "pry"
9
- require_relative "./benchmark/primo_search_response_transformation"
10
- require_relative "./lib/transformator"
11
-
12
- Benchmark.ips do |bm|
13
- bm.report("Transformator::Benchmark::PrimoSearchResponseTransformation") do
14
- Transformator::Benchmark::PrimoSearchResponseTransformation.new.apply(
15
- to: File.read(File.expand_path(File.join(File.dirname(__FILE__), "benchmark/primo_search_response_transformation/primo_response_1.xml"))),
16
- output: :hash
17
- )
18
- end
19
- end
20
-
21
- =begin
22
- #
23
- # document_from_xml
24
- #
25
- xml_file_name = File.join(File.dirname(__FILE__), "benchmark/primo_search_response_transformation/primo_response.xml")
26
- outer_document = Transformator.document_from_xml(File.read(xml_file_name))
27
-
28
- Benchmark.ips do |bm|
29
- bm.report("Transformator.document_from_xml") do
30
- Transformator.document_from_xml(outer_document.locate("*/searchBriefReturn").first.text)
31
- end
32
- end
33
-
34
- #
35
- # hash_from_document
36
- #
37
- xml_file_name = File.join(File.dirname(__FILE__), "benchmark/primo_search_response_transformation/primo_response.xml")
38
- outer_document = Transformator.document_from_xml(File.read(xml_file_name))
39
- inner_document = Transformator.document_from_xml(outer_document.locate("*/searchBriefReturn").first.text)
40
-
41
- Benchmark.ips do |bm|
42
- bm.report("Transformator.hash_from_document") do
43
- Transformator.hash_from_document(inner_document)
44
- end
45
- end
46
- =end
47
- end
48
-
49
6
  task :default => :spec
data/bin/transformator ADDED
@@ -0,0 +1,4 @@
1
+ #!/usr/bin/env ruby
2
+ require "pry"
3
+ require "transformator/cli"
4
+ Transformator::Cli.new(ARGV)
data/lib/transformator.rb CHANGED
@@ -1,33 +1,17 @@
1
- require "ox"
2
1
  require "transformator/version"
3
2
 
4
3
  module Transformator
5
- require_relative "./transformator/dsl"
6
- require_relative "./transformator/format_converter"
7
- require_relative "./transformator/transformation"
8
-
9
- extend Transformator::FormatConverter
10
-
11
- def self.determine_format(obj)
12
- if obj.is_a?(Hash)
13
- :hash
14
- elsif obj.is_a?(Ox::Document)
15
- :ox_document
16
- elsif obj.is_a?(String) && obj[/\A\s*{/]
17
- :json
18
- elsif obj.is_a?(String) && obj[/\A\s*</]
19
- :xml
20
- elsif obj.nil?
21
- nil
22
- else
23
- raise "Unkown format!"
24
- end
25
- end
26
-
27
- def self.oxify_path(path)
28
- path
29
- .gsub(/\A\/\/(\S+)/, "*/\\1")
30
- .gsub(/(\S*)\/\/(\S*)/, "\\1/*/\\2")
31
- .gsub(/\A\/(\w+)(\S*)/, "?/\\1\\2") # replace "/foo" with "?/foo"
32
- end
4
+ require_relative "./transformator/cli"
5
+ require_relative "./transformator/dispatcher"
6
+ require_relative "./transformator/filesystem_pattern_evaluator"
7
+ #require_relative "./transformator/gzip_reader"
8
+ #require_relative "./transformator/gzip_writer"
9
+ require_relative "./transformator/oga_xml_dumper"
10
+ require_relative "./transformator/oga_xml_parser"
11
+ #require_relative "./transformator/ox_xml_dumper"
12
+ #require_relative "./transformator/ox_xml_parser"
13
+ require_relative "./transformator/parallel_processor"
14
+ require_relative "./transformator/null_processor"
15
+ require_relative "./transformator/tar_reader"
16
+ #require_relative "./transformator/tar_writer"
33
17
  end
@@ -0,0 +1,42 @@
1
+ require "erb"
2
+ require "optparse"
3
+ require "transformator"
4
+ require "yaml"
5
+
6
+ require "pry" # TODO: remove
7
+
8
+ class Transformator::Cli
9
+ def initialize(argv = [])
10
+ if argv.empty?
11
+ puts options_parser.help
12
+ else
13
+ @options = parse_argv(argv) || {}
14
+ end
15
+
16
+ Transformator::Dispatcher.new(
17
+ YAML.load(
18
+ ERB.new(
19
+ File.read(
20
+ File.expand_path(@options[:config_file_name])
21
+ )
22
+ ).result
23
+ )
24
+ ).call
25
+ end
26
+
27
+ def parse_argv(argv)
28
+ presence({}.tap do |result|
29
+ OptionParser.new do |opts|
30
+ opts.banner = "Usage: transformator [options]"
31
+
32
+ opts.on( "-c", "--config-file FILE", "Configuration file in yaml format" ) do |config_file_name|
33
+ result[:config_file_name] = config_file_name
34
+ end
35
+ end.parse(argv)
36
+ end)
37
+ end
38
+
39
+ def presence(object)
40
+ (object.empty? rescue false) ? nil : object
41
+ end
42
+ end
@@ -0,0 +1,43 @@
1
+ require "transformator"
2
+
3
+ class Transformator::Dispatcher
4
+ class State
5
+ attr_accessor :finished
6
+ attr_accessor :result
7
+
8
+ def initialize(initial_result)
9
+ @initial_result = initial_result
10
+ reset_result!
11
+ end
12
+
13
+ def finished?
14
+ @finished == true
15
+ end
16
+
17
+ def reset_result!
18
+ @result = @initial_result.dup
19
+ end
20
+ end
21
+
22
+ def initialize(processors = [])
23
+ @processors = processors.map do |processor|
24
+ if processor["options"]
25
+ processor["class"].new(processor["options"])
26
+ else
27
+ processor["class"].new
28
+ end
29
+ end
30
+
31
+ @state = State.new([])
32
+ end
33
+
34
+ def call
35
+ until @state.finished?
36
+ @processors.each do |processor|
37
+ processor.call!(@state)
38
+ end
39
+
40
+ @state.reset_result!
41
+ end
42
+ end
43
+ end
@@ -0,0 +1,14 @@
1
+ require "transformator/processor"
2
+
3
+ class Transformator::FilesystemPatternEvaluator < Transformator::Processor
4
+ def initialize(options = {})
5
+ @patterns = options["patterns"]
6
+ end
7
+
8
+ def call!(state)
9
+ state.result = @patterns.map do |pattern|
10
+ Dir.glob(File.expand_path(pattern))
11
+ end.flatten
12
+ binding.pry
13
+ end
14
+ end
File without changes
File without changes
@@ -0,0 +1,9 @@
1
+ require "transformator/processor"
2
+
3
+ class Transformator::NullProcessor < Transformator::Processor
4
+ def call!(state)
5
+ puts "null"
6
+ #binding.pry
7
+ #state.finished = true
8
+ end
9
+ end
@@ -0,0 +1,10 @@
1
+ require "oga"
2
+ require "transformator/processor"
3
+
4
+ class Transformator::OgaXmlDumper < Transformator::Processor
5
+ def call!(state)
6
+ state.result.map! do |oga_xml_document|
7
+ oga_xml_document.to_xml
8
+ end
9
+ end
10
+ end
@@ -0,0 +1,10 @@
1
+ require "oga"
2
+ require "transformator/processor"
3
+
4
+ class Transformator::OgaXmlParser < Transformator::Processor
5
+ def call!(state)
6
+ state.result.map! do |xml|
7
+ Oga.parse_xml(xml)
8
+ end
9
+ end
10
+ end
@@ -0,0 +1,10 @@
1
+ require "ox"
2
+ require "transformator/processor"
3
+
4
+ class Transformator::OxXmlDumper < Transformator::Processor
5
+ def call!(state)
6
+ state.result.map! do |ox_xml_document|
7
+ Ox.dump(ox_xml_document)
8
+ end
9
+ end
10
+ end
@@ -0,0 +1,10 @@
1
+ require "ox"
2
+ require "transformator/processor"
3
+
4
+ class Transformator::OxXmlParser < Transformator::Processor
5
+ def call!(state)
6
+ state.result.map! do |xml|
7
+ Ox.parse(xml)
8
+ end
9
+ end
10
+ end
@@ -0,0 +1,39 @@
1
+ require "transformator/processor"
2
+ require "thread/pool"
3
+
4
+ class Transformator::ParallelProcessor < Transformator::Processor
5
+ def initialize(options = {})
6
+ @processors = options["processors"].map do |processor|
7
+ if processor["options"]
8
+ processor["class"].new(processor["options"])
9
+ else
10
+ processor["class"].new
11
+ end
12
+ end
13
+
14
+ @number_of_workers = options["workers"]
15
+ @thread_pool = Thread.pool(@number_of_workers)
16
+ end
17
+
18
+ def call!(state)
19
+ if state.result.length > 0
20
+ chunk_size_per_worker = state.result.length.fdiv(@number_of_workers).ceil
21
+ results_enumerator = state.result.each_slice(chunk_size_per_worker)
22
+
23
+ chunks = Array.new(@number_of_workers).map do |_|
24
+ results_enumerator.next
25
+ end
26
+
27
+ @number_of_workers.times do |index|
28
+ @thread_pool.process do
29
+ @processors.each do |processor|
30
+ processor.call!(Struct.new(:result).new(chunks[index]))
31
+ end
32
+ end
33
+ end
34
+
35
+ @thread_pool.wait_done
36
+ state.result = chunks.inject(&:concat)
37
+ end
38
+ end
39
+ end
@@ -0,0 +1,4 @@
1
+ require "transformator"
2
+
3
+ class Transformator::Processor
4
+ end
@@ -0,0 +1,51 @@
1
+ require "transformator/processor"
2
+
3
+ class Transformator::TarReader < Transformator::Processor
4
+ require_relative "./tar_reader/patched_rubygems_tar_reader"
5
+
6
+ attr_accessor :files
7
+
8
+ def initialize(options = {})
9
+ @bulk_size = options["bulk_size"]
10
+ @current_archive_enumerator = nil
11
+
12
+ if options["files"]
13
+ @files = ensure_array(options["files"]).map do |filename|
14
+ Dir.glob(File.expand_path(filename))
15
+ end.flatten
16
+ end
17
+ end
18
+
19
+ def call!(state)
20
+ @files = state.result.slice!(0..-1) unless @files
21
+
22
+ if @current_archive_enumerator.nil?
23
+ if !@files.empty?
24
+ @current_archive_enumerator = PatchedRubygemsTarReader.new(
25
+ Zlib::GzipReader.open(
26
+ File.expand_path(@files.pop)
27
+ )
28
+ ).each
29
+ else
30
+ state.finished = true
31
+ end
32
+ end
33
+
34
+ @bulk_size.times do
35
+ begin
36
+ if @current_archive_enumerator
37
+ state.result.push @current_archive_enumerator.next
38
+ end
39
+ rescue StopIteration
40
+ @current_archive_enumerator = nil
41
+ end
42
+ end
43
+ end
44
+
45
+ #
46
+ private
47
+ #
48
+ def ensure_array(object)
49
+ object.is_a?(Array) ? object : [object]
50
+ end
51
+ end
@@ -0,0 +1,41 @@
1
+ #--
2
+ # Copyright (C) 2004 Mauricio Julio Fernández Pradier
3
+ # See LICENSE.txt for additional licensing information.
4
+ #++
5
+ require "rubygems/package"
6
+
7
+ class Transformator::TarReader::PatchedRubygemsTarReader < Gem::Package::TarReader
8
+ def each
9
+ return enum_for __method__ unless block_given?
10
+
11
+ until @io.eof? do
12
+ header = Gem::Package::TarHeader.from @io
13
+ return if header.empty?
14
+
15
+ entry = Gem::Package::TarReader::Entry.new header, @io
16
+ size = entry.header.size
17
+
18
+ yield entry.read
19
+
20
+ skip = (512 - (size % 512)) % 512
21
+ pending = size - entry.bytes_read
22
+
23
+ begin
24
+ # avoid reading...
25
+ @io.seek pending, IO::SEEK_CUR
26
+ pending = 0
27
+ rescue Errno::EINVAL, NameError
28
+ while pending > 0 do
29
+ bytes_read = @io.read([pending, 4096].min).size
30
+ raise UnexpectedEOF if @io.eof?
31
+ pending -= bytes_read
32
+ end
33
+ end
34
+
35
+ @io.read skip # discard trailing zeros
36
+
37
+ # make sure nobody can use #read, #getc or #rewind anymore
38
+ entry.close
39
+ end
40
+ end
41
+ end
File without changes
@@ -1,3 +1,3 @@
1
1
  module Transformator
2
- VERSION = "0.1.4"
2
+ VERSION = "1.0.0.pre1"
3
3
  end