transformator 0.1.4 → 1.0.0.pre1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile +7 -2
  3. data/Rakefile +0 -43
  4. data/bin/transformator +4 -0
  5. data/lib/transformator.rb +13 -29
  6. data/lib/transformator/cli.rb +42 -0
  7. data/lib/transformator/dispatcher.rb +43 -0
  8. data/lib/transformator/filesystem_pattern_evaluator.rb +14 -0
  9. data/lib/transformator/gzip_reader.rb +0 -0
  10. data/lib/transformator/gzip_writer.rb +0 -0
  11. data/lib/transformator/null_processor.rb +9 -0
  12. data/lib/transformator/oga_xml_dumper.rb +10 -0
  13. data/lib/transformator/oga_xml_parser.rb +10 -0
  14. data/lib/transformator/ox_xml_dumper.rb +10 -0
  15. data/lib/transformator/ox_xml_parser.rb +10 -0
  16. data/lib/transformator/parallel_processor.rb +39 -0
  17. data/lib/transformator/processor.rb +4 -0
  18. data/lib/transformator/tar_reader.rb +51 -0
  19. data/lib/transformator/tar_reader/patched_rubygems_tar_reader.rb +41 -0
  20. data/lib/transformator/tar_writer.rb +0 -0
  21. data/lib/transformator/version.rb +1 -1
  22. data/spec/spec_helper.rb +0 -1
  23. data/spec/transformator_spec.rb +0 -8
  24. data/transformator.gemspec +7 -6
  25. data/ubpb.yml.erb +25 -0
  26. metadata +49 -67
  27. data/assets/primo_search_response.xml +0 -2878
  28. data/assets/primo_search_response_1.xml +0 -2467
  29. data/examples/primo_search_response_transformation.rb +0 -123
  30. data/examples/search_request_transformation.rb +0 -89
  31. data/lib/transformator/dsl.rb +0 -93
  32. data/lib/transformator/format_converter.rb +0 -27
  33. data/lib/transformator/format_converter/document_from_hash.rb +0 -13
  34. data/lib/transformator/format_converter/document_from_object.rb +0 -20
  35. data/lib/transformator/format_converter/document_from_xml.rb +0 -28
  36. data/lib/transformator/format_converter/hash_from_document.rb +0 -61
  37. data/lib/transformator/format_converter/xml_from_document.rb +0 -7
  38. data/lib/transformator/transformation.rb +0 -91
  39. data/spec/examples/primo_search_response_transformation_spec.rb +0 -19
  40. data/spec/examples/search_request_transformation_spec.rb +0 -48
  41. data/spec/transformator/dsl_spec.rb +0 -187
  42. data/spec/transformator/format_converter/hash_from_document_spec.rb +0 -42
  43. data/spec/transformator/transformation_spec.rb +0 -112
@@ -1,123 +0,0 @@
1
- require "transformator"
2
-
3
- module Transformator::Examples
4
- class PrimoSearchResponseTransformation
5
- def self.apply(*args)
6
- transformation.apply(*args)
7
- end
8
-
9
- def self.transformation
10
- Transformator::Transformation.new do
11
- def search_brief_return_transformation
12
- @search_brief_return_transformation ||= Transformator::Transformation.new do
13
- #
14
- # setup target skeleton
15
- #
16
- process :document do |source, target|
17
- target_skeleton = {
18
- took: nil,
19
- hits: {
20
- hits: []
21
- },
22
- facets: {}
23
- }
24
-
25
- elements_from_hash(target_skeleton).each do |element|
26
- target << element
27
- end
28
- end
29
-
30
- #
31
- # facets
32
- #
33
- process "SEGMENTS/JAGROOT/RESULT/FACETLIST/FACET" do |source_facet, target|
34
- # syntactic mapping
35
- find(target, "facets") << element(source_facet["NAME"]) do |target_facet|
36
- source_facet_values = find_all(source_facet, "FACET_VALUES")
37
-
38
- target_facet << element("_type", text: "terms")
39
- target_facet << element("total", text: source_facet_values.length, type: "integer")
40
- target_facet << (terms_array = array("terms"))
41
-
42
- source_facet_values.each do |source_facet_value|
43
- array(terms_array) do |term|
44
- term << element("term", text: source_facet_value[:KEY])
45
- term << element("count", text: source_facet_value[:VALUE], type: "integer")
46
- end
47
- end
48
- end
49
- end
50
-
51
- #
52
- # records
53
- #
54
- process "SEGMENTS/JAGROOT/RESULT/DOCSET/DOC" do |record, target|
55
- array(find(target, "hits/hits")) do |hit|
56
- # syntactic mapping
57
- hit << element("_type", text: "record")
58
- hit << _source = element("_source") do |element|
59
- record.locate("PrimoNMBib/record/?").each do |section|
60
- element << section
61
- end
62
- end
63
-
64
- # semantic mapping
65
- {
66
- "display/creationdate" => "created",
67
- "display/description" => "description",
68
- "display/edition" => "edition",
69
- "display/format" => "format",
70
- "display/language" => "language",
71
- "display/title" => "title",
72
- "display/subject" => "subject",
73
- "display/publisher" => "publisher", # may there be more than one?
74
- "control/recordid" => "id"
75
- }
76
- .each_pair do |from, to|
77
- find(_source, from) do |element|
78
- hit << element(to, text: element.text)
79
- end
80
- end
81
-
82
- find_all(_source, "_source/display/creator").each do |creator|
83
- hit << element("creator", text: creator.text)
84
- end
85
-
86
- #
87
- # identifier
88
- #
89
- hit << array("identifier") do |identifier|
90
- # ilsApiId
91
- identifier << element(find(_source, "control/ilsapiid").text, text: "ilsApiId")
92
-
93
- # isbns
94
- find_all(_source, "search/isbn").each do |isbn|
95
- identifier << element(isbn.text, text: "isbn")
96
- end
97
-
98
- # recordId
99
- identifier << element(find(_source, "control/recordid").text, text: "recordId")
100
- end
101
- end
102
- end
103
- end
104
- end
105
-
106
- process :document do |source, target|
107
- # parse the "string encoded" inner search brief return
108
- search_brief_return = Transformator.document_from_xml(
109
- find(source, "Envelope/Body/searchBriefResponse/searchBriefReturn").text,
110
- remove_namespaces: true,
111
- remove_whitespace_only_text_nodes: false
112
- )
113
-
114
- # apply the literal transformation and merge the result's nodes with target
115
- search_brief_return_transformation.apply(
116
- to: search_brief_return,
117
- output: :ox_document
118
- ).nodes.each { |node| target << node }
119
- end
120
- end
121
- end
122
- end
123
- end
@@ -1,89 +0,0 @@
1
- require "transformator"
2
-
3
- module Transformator::Examples
4
- class SearchRequestTransformation
5
- def self.apply(*args)
6
- transformation.apply(*args)
7
- end
8
-
9
- def self.transformation
10
- Transformator::Transformation.new do
11
- #
12
- # setup outer target skeleton
13
- #
14
- process :target do |target|
15
- target << element_from_xml(
16
- <<-xml.strip_heredoc
17
- <env:Envelope
18
- xmlns:xsd="http://www.w3.org/2001/XMLSchema"
19
- xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
20
- xmlns:impl="http://primo.kobv.de/PrimoWebServices/services/searcher"
21
- xmlns:env="http://schemas.xmlsoap.org/soap/envelope/"
22
- xmlns:ins0="http://xml.apache.org/xml-soap">
23
- <env:Body>
24
- <impl:searchBrief></impl:searchBrief>
25
- </env:Body>
26
- </env:Envelope>
27
- xml
28
- )
29
- end
30
-
31
- #
32
- # setup inner search request that will be wrapped in a cdata element at the end
33
- #
34
- process :none do
35
- # we setup this skeleton instead of dynamic element creation because order matters with primo
36
- @search_request = element_from_xml(
37
- <<-xml.strip_heredoc
38
- <searchRequest xmlns="http://www.exlibris.com/primo/xsd/wsRequest" xmlns:uic="http://www.exlibris.com/primo/xsd/primoview/uicomponents">
39
- <PrimoSearchRequest xmlns="http://www.exlibris.com/primo/xsd/search/request">
40
- <QueryTerms>
41
- <BoolOpeator>AND</BoolOpeator>
42
- </QueryTerms>
43
- <StartIndex></StartIndex>
44
- <BulkSize></BulkSize>
45
- <DidUMeanEnabled>false</DidUMeanEnabled>
46
- <HighlightingEnabled>false</HighlightingEnabled>
47
- <Languages></Languages>
48
- <SortByList></SortByList>
49
- <Locations></Locations>
50
- </PrimoSearchRequest>
51
- <onCampus>false</onCampus>
52
- </searchRequest>
53
- xml
54
- )
55
- end
56
-
57
- #
58
- # transform source into target
59
- #
60
- process "/from" do |element|
61
- find(@search_request, "PrimoSearchRequest/StartIndex") << element.text
62
- end
63
-
64
- process "/size" do |element|
65
- find(@search_request, "PrimoSearchRequest/BulkSize") << element.text
66
- end
67
-
68
- process "//query_string" do |query_string|
69
- find(@search_request, "PrimoSearchRequest/QueryTerms") << element_from_xml(
70
- <<-xml.strip_heredoc
71
- <QueryTerm>
72
- <IndexField></IndexField>
73
- <PrecisionOperator>contains</PrecisionOperator>
74
- <Value>#{find(query_string, "query").text}</Value>
75
- </QueryTerm>
76
- xml
77
- )
78
- end
79
-
80
- #
81
- # finally, wrap the search request into a cdata element
82
- #
83
- process :target do |target|
84
- find(target, "//impl:searchBrief") << cdata(xml_from_element(@search_request))
85
- end
86
- end
87
- end
88
- end
89
- end
@@ -1,93 +0,0 @@
1
- require "ox"
2
-
3
- module Transformator::Dsl
4
- def array(name_or_node, &block)
5
- name_or_node = name_or_node.to_s if name_or_node.is_a?(Symbol) # eliminate "symbol"-case
6
-
7
- name = name_or_node.is_a?(String) ? name_or_node : name_or_node.value
8
- node = name_or_node.is_a?(Ox::Element) ? name_or_node : element(name, type: "array")
9
-
10
- if block
11
- append_accumulator = Struct.new(:elements) do
12
- def <<(element)
13
- self.elements.push(element)
14
- end
15
- end.new([])
16
-
17
- yield(append_accumulator)
18
-
19
- node << element(name) do |array_element|
20
- append_accumulator.elements.each do |element|
21
- array_element << element
22
- end
23
- end
24
- end
25
-
26
- node
27
- end
28
-
29
- def cdata(content, &block)
30
- new_cdata = Ox::CData.new(content)
31
- block ? yield(new_cdata) : new_cdata
32
- end
33
-
34
- def element(name, options = {}, &block)
35
- new_element = Ox::Element.new(name)
36
-
37
- if (attributes = options[:attributes]).is_a?(Hash)
38
- attributes.each_pair do |key, value|
39
- new_element[key.to_s] = value.to_s
40
- end
41
- end
42
-
43
- if nodes = options[:nodes]
44
- (nodes.is_a?(Array) ? nodes : [nodes]).each do |node|
45
- new_element << node
46
- end
47
- end
48
-
49
- if text = options[:text]
50
- new_element << text.to_s
51
- end
52
-
53
- if type = options[:type]
54
- new_element["type"] = type.to_s
55
- end
56
-
57
- yield(new_element) if block
58
- new_element
59
- end
60
-
61
- def elements_from_hash(hash)
62
- Transformator.document_from_hash(hash).root.nodes
63
- end
64
-
65
- def element_from_xml(xml, options = {})
66
- elements_from_xml(xml, options).first
67
- end
68
-
69
- def elements_from_xml(xml, options = {})
70
- Transformator.document_from_xml(xml, options).nodes
71
- end
72
-
73
- def find(node, path, &block)
74
- find_result = find_all(node, path).first
75
-
76
- if block && find_result
77
- yield(find_result)
78
- else
79
- find_result
80
- end
81
- end
82
-
83
- def find_all(node, path, &block)
84
- find_all_result = node.locate(Transformator.oxify_path(path))
85
-
86
- yield(find_all_result) if block && !find_all_result.empty?
87
- find_all_result
88
- end
89
-
90
- def xml_from_element(element)
91
- Transformator.xml_from_document(element, with_xml: false)
92
- end
93
- end
@@ -1,27 +0,0 @@
1
- module Transformator::FormatConverter
2
- require_relative "./format_converter/document_from_hash"
3
- require_relative "./format_converter/document_from_object"
4
- require_relative "./format_converter/document_from_xml"
5
- require_relative "./format_converter/hash_from_document"
6
- require_relative "./format_converter/xml_from_document"
7
-
8
- include DocumentFromHash
9
- include DocumentFromObject
10
- include DocumentFromXml
11
- include HashFromDocument
12
- include XmlFromDocument
13
-
14
- def self.remove_namespaces!(xml)
15
- xml.gsub!(/<(\/?)\w+:(\w+)/, "<\\1\\2")
16
- end
17
-
18
- def self.remove_whitespace_only_text_nodes(xml)
19
- remove_whitespace_only_text_nodes!(xml.dup)
20
- end
21
-
22
- def self.remove_whitespace_only_text_nodes!(xml)
23
- # remove whitespace only text nodes
24
- xml.gsub!(/>(\s|\n|\r)+</, "><")
25
- xml
26
- end
27
- end
@@ -1,13 +0,0 @@
1
- require "active_support/core_ext/hash/conversions"
2
- require "libxml"
3
- require "transformator/format_converter/document_from_xml"
4
-
5
- module Transformator::FormatConverter::DocumentFromHash
6
- include Transformator::FormatConverter::DocumentFromXml
7
-
8
- def document_from_hash(hash)
9
- ActiveSupport::XmlMini.backend = "LibXML"
10
- xml = hash.to_xml(dasherize: false, indent: 0, root: :hash, skip_types: false)
11
- document_from_xml(xml, remove_whitespace_only_text_nodes: false)
12
- end
13
- end
@@ -1,20 +0,0 @@
1
- module Transformator::FormatConverter::DocumentFromObject
2
- def document_from_object(obj, options = {})
3
- case Transformator.determine_format(obj)
4
- when :hash
5
- Transformator.document_from_hash(obj)
6
- when :json
7
- Transformator.document_from_json(obj)
8
- when :ox_document
9
- obj
10
- when :xml
11
- Transformator.document_from_xml(
12
- obj,
13
- remove_whitespace_only_text_nodes: false,
14
- remove_namespaces: true
15
- )
16
- when nil
17
- Ox::Document.new(version: "1.0", encoding: "UTF-8")
18
- end
19
- end
20
- end
@@ -1,28 +0,0 @@
1
- module Transformator::FormatConverter::DocumentFromXml
2
- def document_from_xml(xml, options = {})
3
- xml =
4
- if options[:remove_whitespace_only_text_nodes] || options[:remove_namespaces]
5
- xml.dup
6
- else
7
- xml
8
- end
9
-
10
- unless options[:remove_whitespace_only_text_nodes] == false
11
- Transformator::FormatConverter.remove_whitespace_only_text_nodes!(xml)
12
- end
13
-
14
- if options[:remove_namespaces] == true
15
- Transformator::FormatConverter.remove_namespaces!(xml)
16
- end
17
-
18
- if xml[/\A\s*<\?xml/]
19
- Ox.parse(xml)
20
- else
21
- Ox::Document.new(version: "1.0", encoding: "UTF-8").tap do |new_document|
22
- Ox.parse("<root>" << xml << "</root>").nodes.each do |node|
23
- new_document << node
24
- end
25
- end
26
- end
27
- end
28
- end
@@ -1,61 +0,0 @@
1
- require "ox"
2
-
3
- module Transformator::FormatConverter::HashFromDocument
4
- def hash_from_document(document)
5
- document =
6
- if document.root.value == "hash"
7
- document
8
- else
9
- hash_container_document = Ox::Document.new
10
- hash_container_document << (hash_root_element = Ox::Element.new("hash"))
11
- document.nodes.each do |node|
12
- hash_root_element << node
13
- end
14
-
15
- hash_container_document
16
- end
17
-
18
- hash = Transformator::FormatConverter::HashFromDocument.process_node(document.root, {})
19
- hash["hash"].nil? ? hash : hash["hash"]
20
- end
21
-
22
- def self.process_node(node, hash)
23
- value =
24
- if (child_nodes = node.nodes).all? { |child_node| child_node.is_a?(String) }
25
- case node[:type]
26
- when "integer" then child_nodes.join.to_i
27
- when "float" then child_nodes.join.to_f
28
- when "boolean" then child_nodes.join.downcase == "true"
29
- else node[:nil] == "true" ? nil : child_nodes.join
30
- end
31
- else
32
- if node[:type] == "array"
33
- node.locate(node.value)
34
- .map do |child_node|
35
- if (arr_element = process_node(child_node, {})).is_a?(Hash) && arr_element.keys == [node.value]
36
- arr_element.values.first
37
- else
38
- arr_element
39
- end
40
- end
41
- else
42
- {}.tap do |child_nodes_hash|
43
- child_nodes.each do |child_node|
44
- process_node(child_node, child_nodes_hash)
45
- end
46
- end
47
- end
48
- end
49
-
50
- hash[node.value] =
51
- if hash[node.value].nil?
52
- value
53
- elsif hash[node.value].is_a?(Array)
54
- hash[node.value] << value
55
- else
56
- [hash[node.value], value]
57
- end
58
-
59
- hash
60
- end
61
- end