transformator 0.1.4 → 1.0.0.pre1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (43) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile +7 -2
  3. data/Rakefile +0 -43
  4. data/bin/transformator +4 -0
  5. data/lib/transformator.rb +13 -29
  6. data/lib/transformator/cli.rb +42 -0
  7. data/lib/transformator/dispatcher.rb +43 -0
  8. data/lib/transformator/filesystem_pattern_evaluator.rb +14 -0
  9. data/lib/transformator/gzip_reader.rb +0 -0
  10. data/lib/transformator/gzip_writer.rb +0 -0
  11. data/lib/transformator/null_processor.rb +9 -0
  12. data/lib/transformator/oga_xml_dumper.rb +10 -0
  13. data/lib/transformator/oga_xml_parser.rb +10 -0
  14. data/lib/transformator/ox_xml_dumper.rb +10 -0
  15. data/lib/transformator/ox_xml_parser.rb +10 -0
  16. data/lib/transformator/parallel_processor.rb +39 -0
  17. data/lib/transformator/processor.rb +4 -0
  18. data/lib/transformator/tar_reader.rb +51 -0
  19. data/lib/transformator/tar_reader/patched_rubygems_tar_reader.rb +41 -0
  20. data/lib/transformator/tar_writer.rb +0 -0
  21. data/lib/transformator/version.rb +1 -1
  22. data/spec/spec_helper.rb +0 -1
  23. data/spec/transformator_spec.rb +0 -8
  24. data/transformator.gemspec +7 -6
  25. data/ubpb.yml.erb +25 -0
  26. metadata +49 -67
  27. data/assets/primo_search_response.xml +0 -2878
  28. data/assets/primo_search_response_1.xml +0 -2467
  29. data/examples/primo_search_response_transformation.rb +0 -123
  30. data/examples/search_request_transformation.rb +0 -89
  31. data/lib/transformator/dsl.rb +0 -93
  32. data/lib/transformator/format_converter.rb +0 -27
  33. data/lib/transformator/format_converter/document_from_hash.rb +0 -13
  34. data/lib/transformator/format_converter/document_from_object.rb +0 -20
  35. data/lib/transformator/format_converter/document_from_xml.rb +0 -28
  36. data/lib/transformator/format_converter/hash_from_document.rb +0 -61
  37. data/lib/transformator/format_converter/xml_from_document.rb +0 -7
  38. data/lib/transformator/transformation.rb +0 -91
  39. data/spec/examples/primo_search_response_transformation_spec.rb +0 -19
  40. data/spec/examples/search_request_transformation_spec.rb +0 -48
  41. data/spec/transformator/dsl_spec.rb +0 -187
  42. data/spec/transformator/format_converter/hash_from_document_spec.rb +0 -42
  43. data/spec/transformator/transformation_spec.rb +0 -112
@@ -1,123 +0,0 @@
1
- require "transformator"
2
-
3
- module Transformator::Examples
4
- class PrimoSearchResponseTransformation
5
- def self.apply(*args)
6
- transformation.apply(*args)
7
- end
8
-
9
- def self.transformation
10
- Transformator::Transformation.new do
11
- def search_brief_return_transformation
12
- @search_brief_return_transformation ||= Transformator::Transformation.new do
13
- #
14
- # setup target skeleton
15
- #
16
- process :document do |source, target|
17
- target_skeleton = {
18
- took: nil,
19
- hits: {
20
- hits: []
21
- },
22
- facets: {}
23
- }
24
-
25
- elements_from_hash(target_skeleton).each do |element|
26
- target << element
27
- end
28
- end
29
-
30
- #
31
- # facets
32
- #
33
- process "SEGMENTS/JAGROOT/RESULT/FACETLIST/FACET" do |source_facet, target|
34
- # syntactic mapping
35
- find(target, "facets") << element(source_facet["NAME"]) do |target_facet|
36
- source_facet_values = find_all(source_facet, "FACET_VALUES")
37
-
38
- target_facet << element("_type", text: "terms")
39
- target_facet << element("total", text: source_facet_values.length, type: "integer")
40
- target_facet << (terms_array = array("terms"))
41
-
42
- source_facet_values.each do |source_facet_value|
43
- array(terms_array) do |term|
44
- term << element("term", text: source_facet_value[:KEY])
45
- term << element("count", text: source_facet_value[:VALUE], type: "integer")
46
- end
47
- end
48
- end
49
- end
50
-
51
- #
52
- # records
53
- #
54
- process "SEGMENTS/JAGROOT/RESULT/DOCSET/DOC" do |record, target|
55
- array(find(target, "hits/hits")) do |hit|
56
- # syntactic mapping
57
- hit << element("_type", text: "record")
58
- hit << _source = element("_source") do |element|
59
- record.locate("PrimoNMBib/record/?").each do |section|
60
- element << section
61
- end
62
- end
63
-
64
- # semantic mapping
65
- {
66
- "display/creationdate" => "created",
67
- "display/description" => "description",
68
- "display/edition" => "edition",
69
- "display/format" => "format",
70
- "display/language" => "language",
71
- "display/title" => "title",
72
- "display/subject" => "subject",
73
- "display/publisher" => "publisher", # may there be more than one?
74
- "control/recordid" => "id"
75
- }
76
- .each_pair do |from, to|
77
- find(_source, from) do |element|
78
- hit << element(to, text: element.text)
79
- end
80
- end
81
-
82
- find_all(_source, "_source/display/creator").each do |creator|
83
- hit << element("creator", text: creator.text)
84
- end
85
-
86
- #
87
- # identifier
88
- #
89
- hit << array("identifier") do |identifier|
90
- # ilsApiId
91
- identifier << element(find(_source, "control/ilsapiid").text, text: "ilsApiId")
92
-
93
- # isbns
94
- find_all(_source, "search/isbn").each do |isbn|
95
- identifier << element(isbn.text, text: "isbn")
96
- end
97
-
98
- # recordId
99
- identifier << element(find(_source, "control/recordid").text, text: "recordId")
100
- end
101
- end
102
- end
103
- end
104
- end
105
-
106
- process :document do |source, target|
107
- # parse the "string encoded" inner search brief return
108
- search_brief_return = Transformator.document_from_xml(
109
- find(source, "Envelope/Body/searchBriefResponse/searchBriefReturn").text,
110
- remove_namespaces: true,
111
- remove_whitespace_only_text_nodes: false
112
- )
113
-
114
- # apply the literal transformation and merge the result's nodes with target
115
- search_brief_return_transformation.apply(
116
- to: search_brief_return,
117
- output: :ox_document
118
- ).nodes.each { |node| target << node }
119
- end
120
- end
121
- end
122
- end
123
- end
@@ -1,89 +0,0 @@
1
- require "transformator"
2
-
3
- module Transformator::Examples
4
- class SearchRequestTransformation
5
- def self.apply(*args)
6
- transformation.apply(*args)
7
- end
8
-
9
- def self.transformation
10
- Transformator::Transformation.new do
11
- #
12
- # setup outer target skeleton
13
- #
14
- process :target do |target|
15
- target << element_from_xml(
16
- <<-xml.strip_heredoc
17
- <env:Envelope
18
- xmlns:xsd="http://www.w3.org/2001/XMLSchema"
19
- xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
20
- xmlns:impl="http://primo.kobv.de/PrimoWebServices/services/searcher"
21
- xmlns:env="http://schemas.xmlsoap.org/soap/envelope/"
22
- xmlns:ins0="http://xml.apache.org/xml-soap">
23
- <env:Body>
24
- <impl:searchBrief></impl:searchBrief>
25
- </env:Body>
26
- </env:Envelope>
27
- xml
28
- )
29
- end
30
-
31
- #
32
- # setup inner search request that will be wrapped in a cdata element at the end
33
- #
34
- process :none do
35
- # we setup this skeleton instead of dynamic element creation because order matters with primo
36
- @search_request = element_from_xml(
37
- <<-xml.strip_heredoc
38
- <searchRequest xmlns="http://www.exlibris.com/primo/xsd/wsRequest" xmlns:uic="http://www.exlibris.com/primo/xsd/primoview/uicomponents">
39
- <PrimoSearchRequest xmlns="http://www.exlibris.com/primo/xsd/search/request">
40
- <QueryTerms>
41
- <BoolOpeator>AND</BoolOpeator>
42
- </QueryTerms>
43
- <StartIndex></StartIndex>
44
- <BulkSize></BulkSize>
45
- <DidUMeanEnabled>false</DidUMeanEnabled>
46
- <HighlightingEnabled>false</HighlightingEnabled>
47
- <Languages></Languages>
48
- <SortByList></SortByList>
49
- <Locations></Locations>
50
- </PrimoSearchRequest>
51
- <onCampus>false</onCampus>
52
- </searchRequest>
53
- xml
54
- )
55
- end
56
-
57
- #
58
- # transform source into target
59
- #
60
- process "/from" do |element|
61
- find(@search_request, "PrimoSearchRequest/StartIndex") << element.text
62
- end
63
-
64
- process "/size" do |element|
65
- find(@search_request, "PrimoSearchRequest/BulkSize") << element.text
66
- end
67
-
68
- process "//query_string" do |query_string|
69
- find(@search_request, "PrimoSearchRequest/QueryTerms") << element_from_xml(
70
- <<-xml.strip_heredoc
71
- <QueryTerm>
72
- <IndexField></IndexField>
73
- <PrecisionOperator>contains</PrecisionOperator>
74
- <Value>#{find(query_string, "query").text}</Value>
75
- </QueryTerm>
76
- xml
77
- )
78
- end
79
-
80
- #
81
- # finally, wrap the search request into a cdata element
82
- #
83
- process :target do |target|
84
- find(target, "//impl:searchBrief") << cdata(xml_from_element(@search_request))
85
- end
86
- end
87
- end
88
- end
89
- end
@@ -1,93 +0,0 @@
1
- require "ox"
2
-
3
- module Transformator::Dsl
4
- def array(name_or_node, &block)
5
- name_or_node = name_or_node.to_s if name_or_node.is_a?(Symbol) # eliminate "symbol"-case
6
-
7
- name = name_or_node.is_a?(String) ? name_or_node : name_or_node.value
8
- node = name_or_node.is_a?(Ox::Element) ? name_or_node : element(name, type: "array")
9
-
10
- if block
11
- append_accumulator = Struct.new(:elements) do
12
- def <<(element)
13
- self.elements.push(element)
14
- end
15
- end.new([])
16
-
17
- yield(append_accumulator)
18
-
19
- node << element(name) do |array_element|
20
- append_accumulator.elements.each do |element|
21
- array_element << element
22
- end
23
- end
24
- end
25
-
26
- node
27
- end
28
-
29
- def cdata(content, &block)
30
- new_cdata = Ox::CData.new(content)
31
- block ? yield(new_cdata) : new_cdata
32
- end
33
-
34
- def element(name, options = {}, &block)
35
- new_element = Ox::Element.new(name)
36
-
37
- if (attributes = options[:attributes]).is_a?(Hash)
38
- attributes.each_pair do |key, value|
39
- new_element[key.to_s] = value.to_s
40
- end
41
- end
42
-
43
- if nodes = options[:nodes]
44
- (nodes.is_a?(Array) ? nodes : [nodes]).each do |node|
45
- new_element << node
46
- end
47
- end
48
-
49
- if text = options[:text]
50
- new_element << text.to_s
51
- end
52
-
53
- if type = options[:type]
54
- new_element["type"] = type.to_s
55
- end
56
-
57
- yield(new_element) if block
58
- new_element
59
- end
60
-
61
- def elements_from_hash(hash)
62
- Transformator.document_from_hash(hash).root.nodes
63
- end
64
-
65
- def element_from_xml(xml, options = {})
66
- elements_from_xml(xml, options).first
67
- end
68
-
69
- def elements_from_xml(xml, options = {})
70
- Transformator.document_from_xml(xml, options).nodes
71
- end
72
-
73
- def find(node, path, &block)
74
- find_result = find_all(node, path).first
75
-
76
- if block && find_result
77
- yield(find_result)
78
- else
79
- find_result
80
- end
81
- end
82
-
83
- def find_all(node, path, &block)
84
- find_all_result = node.locate(Transformator.oxify_path(path))
85
-
86
- yield(find_all_result) if block && !find_all_result.empty?
87
- find_all_result
88
- end
89
-
90
- def xml_from_element(element)
91
- Transformator.xml_from_document(element, with_xml: false)
92
- end
93
- end
@@ -1,27 +0,0 @@
1
- module Transformator::FormatConverter
2
- require_relative "./format_converter/document_from_hash"
3
- require_relative "./format_converter/document_from_object"
4
- require_relative "./format_converter/document_from_xml"
5
- require_relative "./format_converter/hash_from_document"
6
- require_relative "./format_converter/xml_from_document"
7
-
8
- include DocumentFromHash
9
- include DocumentFromObject
10
- include DocumentFromXml
11
- include HashFromDocument
12
- include XmlFromDocument
13
-
14
- def self.remove_namespaces!(xml)
15
- xml.gsub!(/<(\/?)\w+:(\w+)/, "<\\1\\2")
16
- end
17
-
18
- def self.remove_whitespace_only_text_nodes(xml)
19
- remove_whitespace_only_text_nodes!(xml.dup)
20
- end
21
-
22
- def self.remove_whitespace_only_text_nodes!(xml)
23
- # remove whitespace only text nodes
24
- xml.gsub!(/>(\s|\n|\r)+</, "><")
25
- xml
26
- end
27
- end
@@ -1,13 +0,0 @@
1
- require "active_support/core_ext/hash/conversions"
2
- require "libxml"
3
- require "transformator/format_converter/document_from_xml"
4
-
5
- module Transformator::FormatConverter::DocumentFromHash
6
- include Transformator::FormatConverter::DocumentFromXml
7
-
8
- def document_from_hash(hash)
9
- ActiveSupport::XmlMini.backend = "LibXML"
10
- xml = hash.to_xml(dasherize: false, indent: 0, root: :hash, skip_types: false)
11
- document_from_xml(xml, remove_whitespace_only_text_nodes: false)
12
- end
13
- end
@@ -1,20 +0,0 @@
1
- module Transformator::FormatConverter::DocumentFromObject
2
- def document_from_object(obj, options = {})
3
- case Transformator.determine_format(obj)
4
- when :hash
5
- Transformator.document_from_hash(obj)
6
- when :json
7
- Transformator.document_from_json(obj)
8
- when :ox_document
9
- obj
10
- when :xml
11
- Transformator.document_from_xml(
12
- obj,
13
- remove_whitespace_only_text_nodes: false,
14
- remove_namespaces: true
15
- )
16
- when nil
17
- Ox::Document.new(version: "1.0", encoding: "UTF-8")
18
- end
19
- end
20
- end
@@ -1,28 +0,0 @@
1
- module Transformator::FormatConverter::DocumentFromXml
2
- def document_from_xml(xml, options = {})
3
- xml =
4
- if options[:remove_whitespace_only_text_nodes] || options[:remove_namespaces]
5
- xml.dup
6
- else
7
- xml
8
- end
9
-
10
- unless options[:remove_whitespace_only_text_nodes] == false
11
- Transformator::FormatConverter.remove_whitespace_only_text_nodes!(xml)
12
- end
13
-
14
- if options[:remove_namespaces] == true
15
- Transformator::FormatConverter.remove_namespaces!(xml)
16
- end
17
-
18
- if xml[/\A\s*<\?xml/]
19
- Ox.parse(xml)
20
- else
21
- Ox::Document.new(version: "1.0", encoding: "UTF-8").tap do |new_document|
22
- Ox.parse("<root>" << xml << "</root>").nodes.each do |node|
23
- new_document << node
24
- end
25
- end
26
- end
27
- end
28
- end
@@ -1,61 +0,0 @@
1
- require "ox"
2
-
3
- module Transformator::FormatConverter::HashFromDocument
4
- def hash_from_document(document)
5
- document =
6
- if document.root.value == "hash"
7
- document
8
- else
9
- hash_container_document = Ox::Document.new
10
- hash_container_document << (hash_root_element = Ox::Element.new("hash"))
11
- document.nodes.each do |node|
12
- hash_root_element << node
13
- end
14
-
15
- hash_container_document
16
- end
17
-
18
- hash = Transformator::FormatConverter::HashFromDocument.process_node(document.root, {})
19
- hash["hash"].nil? ? hash : hash["hash"]
20
- end
21
-
22
- def self.process_node(node, hash)
23
- value =
24
- if (child_nodes = node.nodes).all? { |child_node| child_node.is_a?(String) }
25
- case node[:type]
26
- when "integer" then child_nodes.join.to_i
27
- when "float" then child_nodes.join.to_f
28
- when "boolean" then child_nodes.join.downcase == "true"
29
- else node[:nil] == "true" ? nil : child_nodes.join
30
- end
31
- else
32
- if node[:type] == "array"
33
- node.locate(node.value)
34
- .map do |child_node|
35
- if (arr_element = process_node(child_node, {})).is_a?(Hash) && arr_element.keys == [node.value]
36
- arr_element.values.first
37
- else
38
- arr_element
39
- end
40
- end
41
- else
42
- {}.tap do |child_nodes_hash|
43
- child_nodes.each do |child_node|
44
- process_node(child_node, child_nodes_hash)
45
- end
46
- end
47
- end
48
- end
49
-
50
- hash[node.value] =
51
- if hash[node.value].nil?
52
- value
53
- elsif hash[node.value].is_a?(Array)
54
- hash[node.value] << value
55
- else
56
- [hash[node.value], value]
57
- end
58
-
59
- hash
60
- end
61
- end