datanorm 0.0.1 → 1.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +24 -2
- data/lib/datanorm/document.rb +3 -3
- data/lib/datanorm/documents/assemble.rb +7 -1
- data/lib/datanorm/documents/assembles/product.rb +33 -27
- data/lib/datanorm/documents/assembles/reference.rb +37 -0
- data/lib/datanorm/documents/preprocess.rb +4 -1
- data/lib/datanorm.rb +1 -0
- metadata +2 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 9bc247f6b5cf0caa87be3451f4d2e90a72aba5ad997a00366d48d36f02b2b53d
|
4
|
+
data.tar.gz: 2f37db8b1e291a729c848a9fb6c2c5f8b1ec18985e2c6bc5f407a7ac06915c58
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 06e36c422c1067c51217608f4c59b7112ae0342a4b3fae8d3f19e820c89be79363698c2e8ab39f4749243b9822739a4d752dbf785e080f45402ae44121682a07
|
7
|
+
data.tar.gz: 51b569323216339000c189c7c17744e403a8706ffb80095a3aec702413740d25f8f823477b5214b703779884893247f22c24d1a2fed9d3d45f6fb7e6df608f99
|
data/README.md
CHANGED
@@ -94,7 +94,27 @@ I went for a parsing mechanism that works every time, with every file, at the ex
|
|
94
94
|
|
95
95
|
If you have a `DATANORM.001` and also a `DATPREIS.001`, you must concatenate those two files into one file first (their versions need to be the same). The resulting, merged file is what you provide to this Rubygem.
|
96
96
|
|
97
|
-
|
97
|
+
### Quick Usage
|
98
|
+
|
99
|
+
If you want one product at a time (without having to deal with the complexities of Datanorm), you can use this:
|
100
|
+
|
101
|
+
```ruby
|
102
|
+
document = Datanorm::Document.new(path: 'datanorm.001')
|
103
|
+
|
104
|
+
puts document.header
|
105
|
+
puts document.version
|
106
|
+
|
107
|
+
document.each do |product|
|
108
|
+
puts product.title
|
109
|
+
puts product.to_json
|
110
|
+
end
|
111
|
+
```
|
112
|
+
|
113
|
+
Notice that it can take a long time until the first product is yielded due to the preprocessing that takes place at first.
|
114
|
+
|
115
|
+
### Usage with Progress
|
116
|
+
|
117
|
+
If you want to see the progress, you can use the following:
|
98
118
|
|
99
119
|
```ruby
|
100
120
|
document = Datanorm::Document.new(path: 'datanorm.001')
|
@@ -102,7 +122,7 @@ document = Datanorm::Document.new(path: 'datanorm.001')
|
|
102
122
|
puts document.header
|
103
123
|
puts document.version
|
104
124
|
|
105
|
-
document.each do |product, progress|
|
125
|
+
document.each(yield_progress: true) do |product, progress|
|
106
126
|
# Once pre-processing is complete, you'll start to get products here
|
107
127
|
puts product # <- can be nil in the beginning
|
108
128
|
|
@@ -111,6 +131,8 @@ document.each do |product, progress|
|
|
111
131
|
end
|
112
132
|
```
|
113
133
|
|
134
|
+
### Bare Datanorm parsing
|
135
|
+
|
114
136
|
In case you only want the raw Datanorm file one line at a time as Ruby Objects, you can use this:
|
115
137
|
|
116
138
|
```ruby
|
data/lib/datanorm/document.rb
CHANGED
@@ -28,13 +28,13 @@ module Datanorm
|
|
28
28
|
file.version
|
29
29
|
end
|
30
30
|
|
31
|
-
def each(&)
|
31
|
+
def each(yield_progress: false, &)
|
32
32
|
unless @preprocessed
|
33
|
-
::Datanorm::Documents::Preprocess.call(file:, workdir:, &)
|
33
|
+
::Datanorm::Documents::Preprocess.call(file:, workdir:, yield_progress:, &)
|
34
34
|
@preprocessed = true
|
35
35
|
end
|
36
36
|
|
37
|
-
::Datanorm::Documents::Assemble.call(workdir:, &)
|
37
|
+
::Datanorm::Documents::Assemble.call(workdir:, yield_progress:, &)
|
38
38
|
ensure
|
39
39
|
# At this point all yields have gone through and we can clean up.
|
40
40
|
workdir.rmtree unless ENV['DEBUG_DATANORM']
|
@@ -8,13 +8,19 @@ module Datanorm
|
|
8
8
|
include ::Datanorm::Logging
|
9
9
|
|
10
10
|
option :workdir
|
11
|
+
option :yield_progress, default: -> { false }
|
11
12
|
|
12
13
|
def call
|
13
14
|
return unless products_file.file?
|
14
15
|
|
15
16
|
::File.foreach(products_file) do |json|
|
16
17
|
progress.increment!
|
17
|
-
|
18
|
+
|
19
|
+
if yield_progress
|
20
|
+
yield ::Datanorm::Documents::Assembles::Product.new(json:, workdir:), progress
|
21
|
+
else
|
22
|
+
yield ::Datanorm::Documents::Assembles::Product.new(json:, workdir:)
|
23
|
+
end
|
18
24
|
end
|
19
25
|
end
|
20
26
|
|
@@ -53,7 +53,7 @@ module Datanorm
|
|
53
53
|
# Instead, we choose one or the other.
|
54
54
|
return dimension_content if dimension_content && !dimension_content.strip.empty?
|
55
55
|
|
56
|
-
|
56
|
+
text_reference.read
|
57
57
|
end
|
58
58
|
|
59
59
|
# -----------------------
|
@@ -84,29 +84,43 @@ module Datanorm
|
|
84
84
|
def prices
|
85
85
|
return @prices if defined?(@prices)
|
86
86
|
|
87
|
-
@prices =
|
87
|
+
@prices = price_reference.read&.map do |json|
|
88
88
|
::Datanorm::Documents::Assembles::Price.new(json:)
|
89
89
|
end || []
|
90
90
|
end
|
91
91
|
|
92
|
+
# ----------------------
|
93
|
+
# Calculated Final Price
|
94
|
+
# ----------------------
|
95
|
+
|
96
|
+
# The cheapest of all prices is probably what we pay.
|
97
|
+
def cheapest_price
|
98
|
+
[price, *prices.map(&:price_after_discount)].min
|
99
|
+
end
|
100
|
+
|
101
|
+
# The most expensive of all prices is probably what we sell for.
|
102
|
+
def most_expensive_price
|
103
|
+
[price, *prices.map(&:price)].max
|
104
|
+
end
|
105
|
+
|
92
106
|
# -----------------
|
93
107
|
# Referenced Extras
|
94
108
|
# -----------------
|
95
109
|
|
96
110
|
def matchcode
|
97
|
-
|
111
|
+
extra_reference.read[:matchcode]
|
98
112
|
end
|
99
113
|
|
100
114
|
def alternative_id
|
101
|
-
|
115
|
+
extra_reference.read[:alternative_id]
|
102
116
|
end
|
103
117
|
|
104
118
|
def ean
|
105
|
-
|
119
|
+
extra_reference.read[:ean]
|
106
120
|
end
|
107
121
|
|
108
122
|
def category_id
|
109
|
-
|
123
|
+
extra_reference.read[:category_id]
|
110
124
|
end
|
111
125
|
|
112
126
|
# -------
|
@@ -119,7 +133,7 @@ module Datanorm
|
|
119
133
|
|
120
134
|
def as_json
|
121
135
|
# Adding referenced attributes that were cached to disk during preprocessing.
|
122
|
-
json.merge(description:, prices: prices.map(&:as_json)).merge(
|
136
|
+
json.merge(description:, prices: prices.map(&:as_json)).merge(extra_reference.read)
|
123
137
|
end
|
124
138
|
|
125
139
|
def to_json(...)
|
@@ -143,32 +157,24 @@ module Datanorm
|
|
143
157
|
end
|
144
158
|
end
|
145
159
|
|
146
|
-
def
|
160
|
+
def text_reference
|
147
161
|
return unless text_id
|
148
|
-
return @text_content if defined?(@text_content)
|
149
162
|
|
150
|
-
@
|
151
|
-
path
|
152
|
-
|
153
|
-
end
|
163
|
+
@text_reference ||= ::Datanorm::Documents::Assembles::Reference.new(
|
164
|
+
path: workdir.join('T', ::Datanorm::Helpers::Filename.call(text_id))
|
165
|
+
)
|
154
166
|
end
|
155
167
|
|
156
|
-
def
|
157
|
-
|
158
|
-
|
159
|
-
|
160
|
-
path = workdir.join('B', ::Datanorm::Helpers::Filename.call(id))
|
161
|
-
JSON.parse(path.read, symbolize_names: true) if path.file?
|
162
|
-
end
|
168
|
+
def extra_reference
|
169
|
+
@extra_reference ||= ::Datanorm::Documents::Assembles::Reference.new(
|
170
|
+
path: workdir.join('B', ::Datanorm::Helpers::Filename.call(id)), parse_json: true
|
171
|
+
)
|
163
172
|
end
|
164
173
|
|
165
|
-
def
|
166
|
-
|
167
|
-
|
168
|
-
|
169
|
-
path = workdir.join('P', ::Datanorm::Helpers::Filename.call(id))
|
170
|
-
path.read if path.file?
|
171
|
-
end
|
174
|
+
def price_reference
|
175
|
+
@price_reference ||= ::Datanorm::Documents::Assembles::Reference.new(
|
176
|
+
path: workdir.join('P', ::Datanorm::Helpers::Filename.call(id)), split_newlines: true
|
177
|
+
)
|
172
178
|
end
|
173
179
|
end
|
174
180
|
end
|
@@ -0,0 +1,37 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Datanorm
|
4
|
+
module Documents
|
5
|
+
module Assembles
|
6
|
+
# Convenience helper to read the contents of a file.
|
7
|
+
class Reference
|
8
|
+
extend Dry::Initializer
|
9
|
+
|
10
|
+
option :path
|
11
|
+
option :parse_json, optional: true
|
12
|
+
option :split_newlines, optional: true
|
13
|
+
|
14
|
+
def read
|
15
|
+
return @read if defined?(@read)
|
16
|
+
|
17
|
+
@read = read!
|
18
|
+
end
|
19
|
+
|
20
|
+
private
|
21
|
+
|
22
|
+
def read!
|
23
|
+
if parse_json
|
24
|
+
JSON.parse(path.read, symbolize_names: true) if path.file?
|
25
|
+
|
26
|
+
elsif path.file?
|
27
|
+
if split_newlines
|
28
|
+
path.read.split("\n")
|
29
|
+
else
|
30
|
+
path.read
|
31
|
+
end
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
36
|
+
end
|
37
|
+
end
|
@@ -10,6 +10,7 @@ module Datanorm
|
|
10
10
|
|
11
11
|
option :file
|
12
12
|
option :workdir
|
13
|
+
option :yield_progress, default: -> { false }
|
13
14
|
|
14
15
|
def call
|
15
16
|
FileUtils.mkdir_p(workdir)
|
@@ -18,7 +19,9 @@ module Datanorm
|
|
18
19
|
::Datanorm::Documents::Preprocesses::Process.call(workdir:, record:)
|
19
20
|
|
20
21
|
progress.increment!
|
21
|
-
|
22
|
+
if yield_progress
|
23
|
+
yield nil, progress # No items to yield during preprocess.
|
24
|
+
end
|
22
25
|
end
|
23
26
|
end
|
24
27
|
|
data/lib/datanorm.rb
CHANGED
@@ -41,6 +41,7 @@ require 'datanorm/documents/preprocesses/cache'
|
|
41
41
|
require 'datanorm/documents/preprocesses/process'
|
42
42
|
require 'datanorm/documents/assembles/product'
|
43
43
|
require 'datanorm/documents/assembles/price'
|
44
|
+
require 'datanorm/documents/assembles/reference'
|
44
45
|
require 'datanorm/documents/assemble'
|
45
46
|
require 'datanorm/progress'
|
46
47
|
require 'datanorm/document'
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: datanorm
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version:
|
4
|
+
version: 1.0.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- halo
|
@@ -103,6 +103,7 @@ files:
|
|
103
103
|
- lib/datanorm/documents/assemble.rb
|
104
104
|
- lib/datanorm/documents/assembles/price.rb
|
105
105
|
- lib/datanorm/documents/assembles/product.rb
|
106
|
+
- lib/datanorm/documents/assembles/reference.rb
|
106
107
|
- lib/datanorm/documents/preprocess.rb
|
107
108
|
- lib/datanorm/documents/preprocesses/cache.rb
|
108
109
|
- lib/datanorm/documents/preprocesses/process.rb
|