tax_generator 0.3.0 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/data/input/.gitignore +1 -1
- data/data/input/destinations.xml +2168 -22
- data/data/input/taxonomy.xml +76 -0
- data/lib/tax_generator/classes/processor.rb +9 -4
- data/lib/tax_generator/classes/taxonomy_node.rb +16 -0
- data/lib/tax_generator/classes/taxonomy_tree.rb +25 -9
- data/lib/tax_generator/version.rb +1 -1
- metadata +1 -1
data/data/input/taxonomy.xml
CHANGED
@@ -75,4 +75,80 @@
|
|
75
75
|
</node>
|
76
76
|
</node>
|
77
77
|
</taxonomy>
|
78
|
+
|
79
|
+
<taxonomy>
|
80
|
+
<taxonomy_name>World2</taxonomy_name>
|
81
|
+
<node atlas_node_id = "3550641" ethyl_content_object_id="82534" geo_id = "355064">
|
82
|
+
<node_name>Africa</node_name>
|
83
|
+
<node atlas_node_id = "3556111" ethyl_content_object_id="3210" geo_id = "355611">
|
84
|
+
<node_name>South Africa</node_name>
|
85
|
+
<node atlas_node_id = "3556121" ethyl_content_object_id="35474" geo_id = "355612">
|
86
|
+
<node_name>Cape Town</node_name>
|
87
|
+
<node atlas_node_id = "3556131" ethyl_content_object_id="" geo_id = "355613">
|
88
|
+
<node_name>Table Mountain National Park</node_name>
|
89
|
+
</node>
|
90
|
+
</node>
|
91
|
+
<node atlas_node_id = "3556141" ethyl_content_object_id="" geo_id = "355614">
|
92
|
+
<node_name>Free State</node_name>
|
93
|
+
<node atlas_node_id = "3556151" ethyl_content_object_id="1000550692" geo_id = "355615">
|
94
|
+
<node_name>Bloemfontein</node_name>
|
95
|
+
</node>
|
96
|
+
</node>
|
97
|
+
<node atlas_node_id = "3556161" ethyl_content_object_id="" geo_id = "355616">
|
98
|
+
<node_name>Gauteng</node_name>
|
99
|
+
<node atlas_node_id = "3556171" ethyl_content_object_id="37710" geo_id = "355617">
|
100
|
+
<node_name>Johannesburg</node_name>
|
101
|
+
</node>
|
102
|
+
<node atlas_node_id = "3556181" ethyl_content_object_id="1000548256" geo_id = "355618">
|
103
|
+
<node_name>Pretoria</node_name>
|
104
|
+
</node>
|
105
|
+
</node>
|
106
|
+
<node atlas_node_id = "3556191" ethyl_content_object_id="" geo_id = "355619">
|
107
|
+
<node_name>KwaZulu-Natal</node_name>
|
108
|
+
<node atlas_node_id = "3556201" ethyl_content_object_id="43725" geo_id = "355620">
|
109
|
+
<node_name>Durban</node_name>
|
110
|
+
</node>
|
111
|
+
<node atlas_node_id = "3556211" ethyl_content_object_id="1000576780" geo_id = "355621">
|
112
|
+
<node_name>Pietermaritzburg</node_name>
|
113
|
+
</node>
|
114
|
+
</node>
|
115
|
+
<node atlas_node_id = "3556221" ethyl_content_object_id="" geo_id = "355622">
|
116
|
+
<node_name>Mpumalanga</node_name>
|
117
|
+
<node atlas_node_id = "3556231" ethyl_content_object_id="67561" geo_id = "355623">
|
118
|
+
<node_name>Kruger National Park</node_name>
|
119
|
+
</node>
|
120
|
+
</node>
|
121
|
+
<node atlas_node_id = "3556241" ethyl_content_object_id="" geo_id = "355624">
|
122
|
+
<node_name>The Drakensberg</node_name>
|
123
|
+
<node atlas_node_id = "3556251" ethyl_content_object_id="" geo_id = "355625">
|
124
|
+
<node_name>Royal Natal National Park</node_name>
|
125
|
+
</node>
|
126
|
+
</node>
|
127
|
+
<node atlas_node_id = "3556261" ethyl_content_object_id="" geo_id = "355626">
|
128
|
+
<node_name>The Garden Route</node_name>
|
129
|
+
<node atlas_node_id = "3556271" ethyl_content_object_id="" geo_id = "355627">
|
130
|
+
<node_name>Oudtshoorn</node_name>
|
131
|
+
</node>
|
132
|
+
<node atlas_node_id = "3556281" ethyl_content_object_id="" geo_id = "355628">
|
133
|
+
<node_name>Tsitsikamma Coastal National Park</node_name>
|
134
|
+
</node>
|
135
|
+
</node>
|
136
|
+
</node>
|
137
|
+
<node atlas_node_id = "3556291" ethyl_content_object_id="3263" geo_id = "355629">
|
138
|
+
<node_name>Sudan</node_name>
|
139
|
+
<node atlas_node_id = "3556301" ethyl_content_object_id="" geo_id = "355630">
|
140
|
+
<node_name>Eastern Sudan</node_name>
|
141
|
+
<node atlas_node_id = "3556311" ethyl_content_object_id="" geo_id = "355631">
|
142
|
+
<node_name>Port Sudan</node_name>
|
143
|
+
</node>
|
144
|
+
</node>
|
145
|
+
<node atlas_node_id = "3556321" ethyl_content_object_id="" geo_id = "355632">
|
146
|
+
<node_name>Khartoum</node_name>
|
147
|
+
</node>
|
148
|
+
</node>
|
149
|
+
<node atlas_node_id = "3556331" ethyl_content_object_id="3272" geo_id = "355633">
|
150
|
+
<node_name>Swaziland</node_name>
|
151
|
+
</node>
|
152
|
+
</node>
|
153
|
+
</taxonomy>
|
78
154
|
</taxonomies>
|
@@ -178,11 +178,16 @@ module TaxGenerator
|
|
178
178
|
#
|
179
179
|
# @api public
|
180
180
|
def fetch_file_jobs
|
181
|
-
jobs = [
|
182
|
-
|
183
|
-
|
184
|
-
|
181
|
+
jobs = []
|
182
|
+
count = 0
|
183
|
+
@taxonomy.document.xpath('.//taxonomy').pmap do |taxonomy_node|
|
184
|
+
count += 1
|
185
|
+
jobs << { atlas_id: count, taxonomy: @taxonomy, destination: nil, output_folder: output_folder }
|
185
186
|
end
|
187
|
+
nokogiri_xml(destinations_file_path).xpath('//destination').pmap do |destination|
|
188
|
+
atlas_id = destination.attributes['atlas_id']
|
189
|
+
jobs << { atlas_id: atlas_id.value, taxonomy: @taxonomy, destination: destination, output_folder: output_folder }
|
190
|
+
end
|
186
191
|
jobs
|
187
192
|
end
|
188
193
|
|
@@ -1,6 +1,15 @@
|
|
1
1
|
module TaxGenerator
|
2
2
|
# node from the tree
|
3
3
|
class TaxonomyNode < Tree::TreeNode
|
4
|
+
# prints the entire tree with name and content
|
5
|
+
#
|
6
|
+
# @param [Integer] level the level of the current node, Default 0
|
7
|
+
# @param [Integer] max_depth the maximum depth the tree must be printed. Default nil
|
8
|
+
# @param [Lambda] block the lambda that will be executed for printing node name and content
|
9
|
+
#
|
10
|
+
# @return [String]
|
11
|
+
#
|
12
|
+
# @api public
|
4
13
|
def print_tree(level = 0, max_depth = nil, block = ->(node, prefix) { puts "#{prefix} #{node.respond_to?(:name) ? node.name : node}" })
|
5
14
|
prefix = fetch_prefix_for_printing(level)
|
6
15
|
block.call("#{name}---#{content}", prefix)
|
@@ -9,6 +18,13 @@ module TaxGenerator
|
|
9
18
|
children { |child| child.print_tree(level + 1, max_depth, block) if child } # Child might be 'nil'
|
10
19
|
end
|
11
20
|
|
21
|
+
# builds up the prefix needed to display for current node
|
22
|
+
#
|
23
|
+
# @param [Integer] level the level of the current node
|
24
|
+
#
|
25
|
+
# @return [String]
|
26
|
+
#
|
27
|
+
# @api public
|
12
28
|
def fetch_prefix_for_printing(level)
|
13
29
|
prefix = ''
|
14
30
|
if is_root?
|
@@ -13,7 +13,7 @@ module TaxGenerator
|
|
13
13
|
|
14
14
|
# receives a file path that will be parsed and used to build the tree
|
15
15
|
# @see Tree::TreeNode#new
|
16
|
-
# @see #
|
16
|
+
# @see #find_taxonomies
|
17
17
|
#
|
18
18
|
# @param [String] file_path the path to the xml file that will be parsed and used to build the tree
|
19
19
|
#
|
@@ -22,10 +22,25 @@ module TaxGenerator
|
|
22
22
|
# @api public
|
23
23
|
def initialize(file_path)
|
24
24
|
@document = nokogiri_xml(file_path)
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
25
|
+
@root_node = TaxGenerator::TaxonomyNode.new('ROOT', 'ROOT')
|
26
|
+
find_taxonomies
|
27
|
+
end
|
28
|
+
|
29
|
+
# searches all the taxonomy elements in the document and adds them as top level nodes
|
30
|
+
# and then calls method add_node to search inside childrens
|
31
|
+
# @see #insert_node
|
32
|
+
# @see #add_node
|
33
|
+
#
|
34
|
+
# @return [void]
|
35
|
+
#
|
36
|
+
# @api public
|
37
|
+
def find_taxonomies
|
38
|
+
count = 0
|
39
|
+
@document.xpath('.//taxonomy').pmap do |taxonomy_node|
|
40
|
+
count += 1
|
41
|
+
taxonomy_name = taxonomy_node.at_xpath('.//taxonomy_name')
|
42
|
+
tax_node = insert_node(count.to_s, taxonomy_name.content, @root_node)
|
43
|
+
add_node(taxonomy_node, tax_node, skip_add: true)
|
29
44
|
end
|
30
45
|
end
|
31
46
|
|
@@ -60,7 +75,8 @@ module TaxGenerator
|
|
60
75
|
def add_taxonomy_node(taxonomy_node, node)
|
61
76
|
atlas_node_id = taxonomy_node.attributes['atlas_node_id']
|
62
77
|
node_name = taxonomy_node.children.find { |child| child.name == 'node_name' }
|
63
|
-
|
78
|
+
return if atlas_node_id.blank? || node_name.blank?
|
79
|
+
insert_node(atlas_node_id.value, node_name.content, node)
|
64
80
|
end
|
65
81
|
|
66
82
|
# inserts a new node in the tree by checking first if atlas_id and node_name are present
|
@@ -76,7 +92,7 @@ module TaxGenerator
|
|
76
92
|
# @api public
|
77
93
|
def insert_node(atlas_node_id, node_name, node)
|
78
94
|
return if atlas_node_id.blank? || node_name.blank?
|
79
|
-
current_node = TaxGenerator::TaxonomyNode.new(atlas_node_id
|
95
|
+
current_node = TaxGenerator::TaxonomyNode.new(atlas_node_id, node_name)
|
80
96
|
node << current_node
|
81
97
|
current_node
|
82
98
|
end
|
@@ -91,8 +107,8 @@ module TaxGenerator
|
|
91
107
|
# @return [void]
|
92
108
|
#
|
93
109
|
# @api public
|
94
|
-
def add_node(taxonomy_node, node)
|
95
|
-
tax_node = add_taxonomy_node(taxonomy_node, node)
|
110
|
+
def add_node(taxonomy_node, node, options = {})
|
111
|
+
tax_node = options[:skip_add].present? ? node : add_taxonomy_node(taxonomy_node, node)
|
96
112
|
return unless taxonomy_node.children.any?
|
97
113
|
taxonomy_node.xpath('./node').pmap do |child_node|
|
98
114
|
add_node(child_node, tax_node) if tax_node.present?
|