transformator 0.1.4 → 1.0.0.pre1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Gemfile +7 -2
- data/Rakefile +0 -43
- data/bin/transformator +4 -0
- data/lib/transformator.rb +13 -29
- data/lib/transformator/cli.rb +42 -0
- data/lib/transformator/dispatcher.rb +43 -0
- data/lib/transformator/filesystem_pattern_evaluator.rb +14 -0
- data/lib/transformator/gzip_reader.rb +0 -0
- data/lib/transformator/gzip_writer.rb +0 -0
- data/lib/transformator/null_processor.rb +9 -0
- data/lib/transformator/oga_xml_dumper.rb +10 -0
- data/lib/transformator/oga_xml_parser.rb +10 -0
- data/lib/transformator/ox_xml_dumper.rb +10 -0
- data/lib/transformator/ox_xml_parser.rb +10 -0
- data/lib/transformator/parallel_processor.rb +39 -0
- data/lib/transformator/processor.rb +4 -0
- data/lib/transformator/tar_reader.rb +51 -0
- data/lib/transformator/tar_reader/patched_rubygems_tar_reader.rb +41 -0
- data/lib/transformator/tar_writer.rb +0 -0
- data/lib/transformator/version.rb +1 -1
- data/spec/spec_helper.rb +0 -1
- data/spec/transformator_spec.rb +0 -8
- data/transformator.gemspec +7 -6
- data/ubpb.yml.erb +25 -0
- metadata +49 -67
- data/assets/primo_search_response.xml +0 -2878
- data/assets/primo_search_response_1.xml +0 -2467
- data/examples/primo_search_response_transformation.rb +0 -123
- data/examples/search_request_transformation.rb +0 -89
- data/lib/transformator/dsl.rb +0 -93
- data/lib/transformator/format_converter.rb +0 -27
- data/lib/transformator/format_converter/document_from_hash.rb +0 -13
- data/lib/transformator/format_converter/document_from_object.rb +0 -20
- data/lib/transformator/format_converter/document_from_xml.rb +0 -28
- data/lib/transformator/format_converter/hash_from_document.rb +0 -61
- data/lib/transformator/format_converter/xml_from_document.rb +0 -7
- data/lib/transformator/transformation.rb +0 -91
- data/spec/examples/primo_search_response_transformation_spec.rb +0 -19
- data/spec/examples/search_request_transformation_spec.rb +0 -48
- data/spec/transformator/dsl_spec.rb +0 -187
- data/spec/transformator/format_converter/hash_from_document_spec.rb +0 -42
- data/spec/transformator/transformation_spec.rb +0 -112
@@ -1,123 +0,0 @@
|
|
1
|
-
require "transformator"
|
2
|
-
|
3
|
-
module Transformator::Examples
|
4
|
-
class PrimoSearchResponseTransformation
|
5
|
-
def self.apply(*args)
|
6
|
-
transformation.apply(*args)
|
7
|
-
end
|
8
|
-
|
9
|
-
def self.transformation
|
10
|
-
Transformator::Transformation.new do
|
11
|
-
def search_brief_return_transformation
|
12
|
-
@search_brief_return_transformation ||= Transformator::Transformation.new do
|
13
|
-
#
|
14
|
-
# setup target skeleton
|
15
|
-
#
|
16
|
-
process :document do |source, target|
|
17
|
-
target_skeleton = {
|
18
|
-
took: nil,
|
19
|
-
hits: {
|
20
|
-
hits: []
|
21
|
-
},
|
22
|
-
facets: {}
|
23
|
-
}
|
24
|
-
|
25
|
-
elements_from_hash(target_skeleton).each do |element|
|
26
|
-
target << element
|
27
|
-
end
|
28
|
-
end
|
29
|
-
|
30
|
-
#
|
31
|
-
# facets
|
32
|
-
#
|
33
|
-
process "SEGMENTS/JAGROOT/RESULT/FACETLIST/FACET" do |source_facet, target|
|
34
|
-
# syntactic mapping
|
35
|
-
find(target, "facets") << element(source_facet["NAME"]) do |target_facet|
|
36
|
-
source_facet_values = find_all(source_facet, "FACET_VALUES")
|
37
|
-
|
38
|
-
target_facet << element("_type", text: "terms")
|
39
|
-
target_facet << element("total", text: source_facet_values.length, type: "integer")
|
40
|
-
target_facet << (terms_array = array("terms"))
|
41
|
-
|
42
|
-
source_facet_values.each do |source_facet_value|
|
43
|
-
array(terms_array) do |term|
|
44
|
-
term << element("term", text: source_facet_value[:KEY])
|
45
|
-
term << element("count", text: source_facet_value[:VALUE], type: "integer")
|
46
|
-
end
|
47
|
-
end
|
48
|
-
end
|
49
|
-
end
|
50
|
-
|
51
|
-
#
|
52
|
-
# records
|
53
|
-
#
|
54
|
-
process "SEGMENTS/JAGROOT/RESULT/DOCSET/DOC" do |record, target|
|
55
|
-
array(find(target, "hits/hits")) do |hit|
|
56
|
-
# syntactic mapping
|
57
|
-
hit << element("_type", text: "record")
|
58
|
-
hit << _source = element("_source") do |element|
|
59
|
-
record.locate("PrimoNMBib/record/?").each do |section|
|
60
|
-
element << section
|
61
|
-
end
|
62
|
-
end
|
63
|
-
|
64
|
-
# semantic mapping
|
65
|
-
{
|
66
|
-
"display/creationdate" => "created",
|
67
|
-
"display/description" => "description",
|
68
|
-
"display/edition" => "edition",
|
69
|
-
"display/format" => "format",
|
70
|
-
"display/language" => "language",
|
71
|
-
"display/title" => "title",
|
72
|
-
"display/subject" => "subject",
|
73
|
-
"display/publisher" => "publisher", # may there be more than one?
|
74
|
-
"control/recordid" => "id"
|
75
|
-
}
|
76
|
-
.each_pair do |from, to|
|
77
|
-
find(_source, from) do |element|
|
78
|
-
hit << element(to, text: element.text)
|
79
|
-
end
|
80
|
-
end
|
81
|
-
|
82
|
-
find_all(_source, "_source/display/creator").each do |creator|
|
83
|
-
hit << element("creator", text: creator.text)
|
84
|
-
end
|
85
|
-
|
86
|
-
#
|
87
|
-
# identifier
|
88
|
-
#
|
89
|
-
hit << array("identifier") do |identifier|
|
90
|
-
# ilsApiId
|
91
|
-
identifier << element(find(_source, "control/ilsapiid").text, text: "ilsApiId")
|
92
|
-
|
93
|
-
# isbns
|
94
|
-
find_all(_source, "search/isbn").each do |isbn|
|
95
|
-
identifier << element(isbn.text, text: "isbn")
|
96
|
-
end
|
97
|
-
|
98
|
-
# recordId
|
99
|
-
identifier << element(find(_source, "control/recordid").text, text: "recordId")
|
100
|
-
end
|
101
|
-
end
|
102
|
-
end
|
103
|
-
end
|
104
|
-
end
|
105
|
-
|
106
|
-
process :document do |source, target|
|
107
|
-
# parse the "string encoded" inner search brief return
|
108
|
-
search_brief_return = Transformator.document_from_xml(
|
109
|
-
find(source, "Envelope/Body/searchBriefResponse/searchBriefReturn").text,
|
110
|
-
remove_namespaces: true,
|
111
|
-
remove_whitespace_only_text_nodes: false
|
112
|
-
)
|
113
|
-
|
114
|
-
# apply the literal transformation and merge the result's nodes with target
|
115
|
-
search_brief_return_transformation.apply(
|
116
|
-
to: search_brief_return,
|
117
|
-
output: :ox_document
|
118
|
-
).nodes.each { |node| target << node }
|
119
|
-
end
|
120
|
-
end
|
121
|
-
end
|
122
|
-
end
|
123
|
-
end
|
@@ -1,89 +0,0 @@
|
|
1
|
-
require "transformator"
|
2
|
-
|
3
|
-
module Transformator::Examples
|
4
|
-
class SearchRequestTransformation
|
5
|
-
def self.apply(*args)
|
6
|
-
transformation.apply(*args)
|
7
|
-
end
|
8
|
-
|
9
|
-
def self.transformation
|
10
|
-
Transformator::Transformation.new do
|
11
|
-
#
|
12
|
-
# setup outer target skeleton
|
13
|
-
#
|
14
|
-
process :target do |target|
|
15
|
-
target << element_from_xml(
|
16
|
-
<<-xml.strip_heredoc
|
17
|
-
<env:Envelope
|
18
|
-
xmlns:xsd="http://www.w3.org/2001/XMLSchema"
|
19
|
-
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
|
20
|
-
xmlns:impl="http://primo.kobv.de/PrimoWebServices/services/searcher"
|
21
|
-
xmlns:env="http://schemas.xmlsoap.org/soap/envelope/"
|
22
|
-
xmlns:ins0="http://xml.apache.org/xml-soap">
|
23
|
-
<env:Body>
|
24
|
-
<impl:searchBrief></impl:searchBrief>
|
25
|
-
</env:Body>
|
26
|
-
</env:Envelope>
|
27
|
-
xml
|
28
|
-
)
|
29
|
-
end
|
30
|
-
|
31
|
-
#
|
32
|
-
# setup inner search request that will be wrapped in a cdata element at the end
|
33
|
-
#
|
34
|
-
process :none do
|
35
|
-
# we setup this skeleton instead of dynamic element creation because order matters with primo
|
36
|
-
@search_request = element_from_xml(
|
37
|
-
<<-xml.strip_heredoc
|
38
|
-
<searchRequest xmlns="http://www.exlibris.com/primo/xsd/wsRequest" xmlns:uic="http://www.exlibris.com/primo/xsd/primoview/uicomponents">
|
39
|
-
<PrimoSearchRequest xmlns="http://www.exlibris.com/primo/xsd/search/request">
|
40
|
-
<QueryTerms>
|
41
|
-
<BoolOpeator>AND</BoolOpeator>
|
42
|
-
</QueryTerms>
|
43
|
-
<StartIndex></StartIndex>
|
44
|
-
<BulkSize></BulkSize>
|
45
|
-
<DidUMeanEnabled>false</DidUMeanEnabled>
|
46
|
-
<HighlightingEnabled>false</HighlightingEnabled>
|
47
|
-
<Languages></Languages>
|
48
|
-
<SortByList></SortByList>
|
49
|
-
<Locations></Locations>
|
50
|
-
</PrimoSearchRequest>
|
51
|
-
<onCampus>false</onCampus>
|
52
|
-
</searchRequest>
|
53
|
-
xml
|
54
|
-
)
|
55
|
-
end
|
56
|
-
|
57
|
-
#
|
58
|
-
# transform source into target
|
59
|
-
#
|
60
|
-
process "/from" do |element|
|
61
|
-
find(@search_request, "PrimoSearchRequest/StartIndex") << element.text
|
62
|
-
end
|
63
|
-
|
64
|
-
process "/size" do |element|
|
65
|
-
find(@search_request, "PrimoSearchRequest/BulkSize") << element.text
|
66
|
-
end
|
67
|
-
|
68
|
-
process "//query_string" do |query_string|
|
69
|
-
find(@search_request, "PrimoSearchRequest/QueryTerms") << element_from_xml(
|
70
|
-
<<-xml.strip_heredoc
|
71
|
-
<QueryTerm>
|
72
|
-
<IndexField></IndexField>
|
73
|
-
<PrecisionOperator>contains</PrecisionOperator>
|
74
|
-
<Value>#{find(query_string, "query").text}</Value>
|
75
|
-
</QueryTerm>
|
76
|
-
xml
|
77
|
-
)
|
78
|
-
end
|
79
|
-
|
80
|
-
#
|
81
|
-
# finally, wrap the search request into a cdata element
|
82
|
-
#
|
83
|
-
process :target do |target|
|
84
|
-
find(target, "//impl:searchBrief") << cdata(xml_from_element(@search_request))
|
85
|
-
end
|
86
|
-
end
|
87
|
-
end
|
88
|
-
end
|
89
|
-
end
|
data/lib/transformator/dsl.rb
DELETED
@@ -1,93 +0,0 @@
|
|
1
|
-
require "ox"
|
2
|
-
|
3
|
-
module Transformator::Dsl
|
4
|
-
def array(name_or_node, &block)
|
5
|
-
name_or_node = name_or_node.to_s if name_or_node.is_a?(Symbol) # eliminate "symbol"-case
|
6
|
-
|
7
|
-
name = name_or_node.is_a?(String) ? name_or_node : name_or_node.value
|
8
|
-
node = name_or_node.is_a?(Ox::Element) ? name_or_node : element(name, type: "array")
|
9
|
-
|
10
|
-
if block
|
11
|
-
append_accumulator = Struct.new(:elements) do
|
12
|
-
def <<(element)
|
13
|
-
self.elements.push(element)
|
14
|
-
end
|
15
|
-
end.new([])
|
16
|
-
|
17
|
-
yield(append_accumulator)
|
18
|
-
|
19
|
-
node << element(name) do |array_element|
|
20
|
-
append_accumulator.elements.each do |element|
|
21
|
-
array_element << element
|
22
|
-
end
|
23
|
-
end
|
24
|
-
end
|
25
|
-
|
26
|
-
node
|
27
|
-
end
|
28
|
-
|
29
|
-
def cdata(content, &block)
|
30
|
-
new_cdata = Ox::CData.new(content)
|
31
|
-
block ? yield(new_cdata) : new_cdata
|
32
|
-
end
|
33
|
-
|
34
|
-
def element(name, options = {}, &block)
|
35
|
-
new_element = Ox::Element.new(name)
|
36
|
-
|
37
|
-
if (attributes = options[:attributes]).is_a?(Hash)
|
38
|
-
attributes.each_pair do |key, value|
|
39
|
-
new_element[key.to_s] = value.to_s
|
40
|
-
end
|
41
|
-
end
|
42
|
-
|
43
|
-
if nodes = options[:nodes]
|
44
|
-
(nodes.is_a?(Array) ? nodes : [nodes]).each do |node|
|
45
|
-
new_element << node
|
46
|
-
end
|
47
|
-
end
|
48
|
-
|
49
|
-
if text = options[:text]
|
50
|
-
new_element << text.to_s
|
51
|
-
end
|
52
|
-
|
53
|
-
if type = options[:type]
|
54
|
-
new_element["type"] = type.to_s
|
55
|
-
end
|
56
|
-
|
57
|
-
yield(new_element) if block
|
58
|
-
new_element
|
59
|
-
end
|
60
|
-
|
61
|
-
def elements_from_hash(hash)
|
62
|
-
Transformator.document_from_hash(hash).root.nodes
|
63
|
-
end
|
64
|
-
|
65
|
-
def element_from_xml(xml, options = {})
|
66
|
-
elements_from_xml(xml, options).first
|
67
|
-
end
|
68
|
-
|
69
|
-
def elements_from_xml(xml, options = {})
|
70
|
-
Transformator.document_from_xml(xml, options).nodes
|
71
|
-
end
|
72
|
-
|
73
|
-
def find(node, path, &block)
|
74
|
-
find_result = find_all(node, path).first
|
75
|
-
|
76
|
-
if block && find_result
|
77
|
-
yield(find_result)
|
78
|
-
else
|
79
|
-
find_result
|
80
|
-
end
|
81
|
-
end
|
82
|
-
|
83
|
-
def find_all(node, path, &block)
|
84
|
-
find_all_result = node.locate(Transformator.oxify_path(path))
|
85
|
-
|
86
|
-
yield(find_all_result) if block && !find_all_result.empty?
|
87
|
-
find_all_result
|
88
|
-
end
|
89
|
-
|
90
|
-
def xml_from_element(element)
|
91
|
-
Transformator.xml_from_document(element, with_xml: false)
|
92
|
-
end
|
93
|
-
end
|
@@ -1,27 +0,0 @@
|
|
1
|
-
module Transformator::FormatConverter
|
2
|
-
require_relative "./format_converter/document_from_hash"
|
3
|
-
require_relative "./format_converter/document_from_object"
|
4
|
-
require_relative "./format_converter/document_from_xml"
|
5
|
-
require_relative "./format_converter/hash_from_document"
|
6
|
-
require_relative "./format_converter/xml_from_document"
|
7
|
-
|
8
|
-
include DocumentFromHash
|
9
|
-
include DocumentFromObject
|
10
|
-
include DocumentFromXml
|
11
|
-
include HashFromDocument
|
12
|
-
include XmlFromDocument
|
13
|
-
|
14
|
-
def self.remove_namespaces!(xml)
|
15
|
-
xml.gsub!(/<(\/?)\w+:(\w+)/, "<\\1\\2")
|
16
|
-
end
|
17
|
-
|
18
|
-
def self.remove_whitespace_only_text_nodes(xml)
|
19
|
-
remove_whitespace_only_text_nodes!(xml.dup)
|
20
|
-
end
|
21
|
-
|
22
|
-
def self.remove_whitespace_only_text_nodes!(xml)
|
23
|
-
# remove whitespace only text nodes
|
24
|
-
xml.gsub!(/>(\s|\n|\r)+</, "><")
|
25
|
-
xml
|
26
|
-
end
|
27
|
-
end
|
@@ -1,13 +0,0 @@
|
|
1
|
-
require "active_support/core_ext/hash/conversions"
|
2
|
-
require "libxml"
|
3
|
-
require "transformator/format_converter/document_from_xml"
|
4
|
-
|
5
|
-
module Transformator::FormatConverter::DocumentFromHash
|
6
|
-
include Transformator::FormatConverter::DocumentFromXml
|
7
|
-
|
8
|
-
def document_from_hash(hash)
|
9
|
-
ActiveSupport::XmlMini.backend = "LibXML"
|
10
|
-
xml = hash.to_xml(dasherize: false, indent: 0, root: :hash, skip_types: false)
|
11
|
-
document_from_xml(xml, remove_whitespace_only_text_nodes: false)
|
12
|
-
end
|
13
|
-
end
|
@@ -1,20 +0,0 @@
|
|
1
|
-
module Transformator::FormatConverter::DocumentFromObject
|
2
|
-
def document_from_object(obj, options = {})
|
3
|
-
case Transformator.determine_format(obj)
|
4
|
-
when :hash
|
5
|
-
Transformator.document_from_hash(obj)
|
6
|
-
when :json
|
7
|
-
Transformator.document_from_json(obj)
|
8
|
-
when :ox_document
|
9
|
-
obj
|
10
|
-
when :xml
|
11
|
-
Transformator.document_from_xml(
|
12
|
-
obj,
|
13
|
-
remove_whitespace_only_text_nodes: false,
|
14
|
-
remove_namespaces: true
|
15
|
-
)
|
16
|
-
when nil
|
17
|
-
Ox::Document.new(version: "1.0", encoding: "UTF-8")
|
18
|
-
end
|
19
|
-
end
|
20
|
-
end
|
@@ -1,28 +0,0 @@
|
|
1
|
-
module Transformator::FormatConverter::DocumentFromXml
|
2
|
-
def document_from_xml(xml, options = {})
|
3
|
-
xml =
|
4
|
-
if options[:remove_whitespace_only_text_nodes] || options[:remove_namespaces]
|
5
|
-
xml.dup
|
6
|
-
else
|
7
|
-
xml
|
8
|
-
end
|
9
|
-
|
10
|
-
unless options[:remove_whitespace_only_text_nodes] == false
|
11
|
-
Transformator::FormatConverter.remove_whitespace_only_text_nodes!(xml)
|
12
|
-
end
|
13
|
-
|
14
|
-
if options[:remove_namespaces] == true
|
15
|
-
Transformator::FormatConverter.remove_namespaces!(xml)
|
16
|
-
end
|
17
|
-
|
18
|
-
if xml[/\A\s*<\?xml/]
|
19
|
-
Ox.parse(xml)
|
20
|
-
else
|
21
|
-
Ox::Document.new(version: "1.0", encoding: "UTF-8").tap do |new_document|
|
22
|
-
Ox.parse("<root>" << xml << "</root>").nodes.each do |node|
|
23
|
-
new_document << node
|
24
|
-
end
|
25
|
-
end
|
26
|
-
end
|
27
|
-
end
|
28
|
-
end
|
@@ -1,61 +0,0 @@
|
|
1
|
-
require "ox"
|
2
|
-
|
3
|
-
module Transformator::FormatConverter::HashFromDocument
|
4
|
-
def hash_from_document(document)
|
5
|
-
document =
|
6
|
-
if document.root.value == "hash"
|
7
|
-
document
|
8
|
-
else
|
9
|
-
hash_container_document = Ox::Document.new
|
10
|
-
hash_container_document << (hash_root_element = Ox::Element.new("hash"))
|
11
|
-
document.nodes.each do |node|
|
12
|
-
hash_root_element << node
|
13
|
-
end
|
14
|
-
|
15
|
-
hash_container_document
|
16
|
-
end
|
17
|
-
|
18
|
-
hash = Transformator::FormatConverter::HashFromDocument.process_node(document.root, {})
|
19
|
-
hash["hash"].nil? ? hash : hash["hash"]
|
20
|
-
end
|
21
|
-
|
22
|
-
def self.process_node(node, hash)
|
23
|
-
value =
|
24
|
-
if (child_nodes = node.nodes).all? { |child_node| child_node.is_a?(String) }
|
25
|
-
case node[:type]
|
26
|
-
when "integer" then child_nodes.join.to_i
|
27
|
-
when "float" then child_nodes.join.to_f
|
28
|
-
when "boolean" then child_nodes.join.downcase == "true"
|
29
|
-
else node[:nil] == "true" ? nil : child_nodes.join
|
30
|
-
end
|
31
|
-
else
|
32
|
-
if node[:type] == "array"
|
33
|
-
node.locate(node.value)
|
34
|
-
.map do |child_node|
|
35
|
-
if (arr_element = process_node(child_node, {})).is_a?(Hash) && arr_element.keys == [node.value]
|
36
|
-
arr_element.values.first
|
37
|
-
else
|
38
|
-
arr_element
|
39
|
-
end
|
40
|
-
end
|
41
|
-
else
|
42
|
-
{}.tap do |child_nodes_hash|
|
43
|
-
child_nodes.each do |child_node|
|
44
|
-
process_node(child_node, child_nodes_hash)
|
45
|
-
end
|
46
|
-
end
|
47
|
-
end
|
48
|
-
end
|
49
|
-
|
50
|
-
hash[node.value] =
|
51
|
-
if hash[node.value].nil?
|
52
|
-
value
|
53
|
-
elsif hash[node.value].is_a?(Array)
|
54
|
-
hash[node.value] << value
|
55
|
-
else
|
56
|
-
[hash[node.value], value]
|
57
|
-
end
|
58
|
-
|
59
|
-
hash
|
60
|
-
end
|
61
|
-
end
|