blz 0.2.0.20181203 → 0.2.0.20190603
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +5 -0
- data/Gemfile +1 -0
- data/blz.gemspec +1 -1
- data/data/2019_06_03.tsv.gz +0 -0
- data/scripts/fetch.rb +25 -15
- metadata +3 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: b10c9a3ddad85875050f3f6aed75714a09692a95
|
4
|
+
data.tar.gz: 971d1fa6f38e48ec43fc90a7571f8f473383ef45
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 14a2e5ffcbddf0c97d0431517a82effa37e9bc824be630cfb84f23ee82b1459b4c08ff9aac740a529a4327afd3c5f007259c2f892b6fdc95a44d3ad8e0486b78
|
7
|
+
data.tar.gz: 5cb61246b47a9903f788b175da47233474e9e2ac82f025098cd1aadc93e72817d743e271787a266e9e678164607a0f393fd12c0436a82175b704d9e2c82e7b9e
|
data/CHANGELOG.md
CHANGED
data/Gemfile
CHANGED
data/blz.gemspec
CHANGED
Binary file
|
data/scripts/fetch.rb
CHANGED
@@ -3,6 +3,7 @@
|
|
3
3
|
require "mechanize"
|
4
4
|
require "logger"
|
5
5
|
require "zlib"
|
6
|
+
require "zip"
|
6
7
|
|
7
8
|
if (f = File.expand_path("~/.extranet")) && File.exist?(f)
|
8
9
|
username, password = File.read(f).strip.split(":", 2)
|
@@ -44,9 +45,10 @@ begin
|
|
44
45
|
sleep 1
|
45
46
|
page = agent.get NAVIGATION[:table]
|
46
47
|
|
47
|
-
target_link = page.links
|
48
|
-
!!(link.text.strip =~ /BLZ_\d{8}.
|
49
|
-
|
48
|
+
target_link = page.links
|
49
|
+
.select {|link| !!(link.text.strip =~ /BLZ_\d{8}.zip/) }
|
50
|
+
.sort_by {|link| link.text.strip }
|
51
|
+
.last
|
50
52
|
|
51
53
|
if !target_link
|
52
54
|
logger.info("no new download found")
|
@@ -54,7 +56,7 @@ begin
|
|
54
56
|
end
|
55
57
|
|
56
58
|
name = target_link.text.strip
|
57
|
-
if name == format("BLZ_%4d%02d%02d.
|
59
|
+
if name == format("BLZ_%4d%02d%02d.zip", last_match[:y].to_i, last_match[:m].to_i, last_match[:d].to_i)
|
58
60
|
logger.info("no matching link found")
|
59
61
|
exit 0
|
60
62
|
end
|
@@ -63,21 +65,29 @@ begin
|
|
63
65
|
sleep 1
|
64
66
|
blz = agent.get(target_link.href)
|
65
67
|
|
66
|
-
name_match = name.match(/BLZ_(?<y>\d{4})(?<m>\d\d)(?<d>\d\d)\.
|
68
|
+
name_match = name.match(/BLZ_(?<y>\d{4})(?<m>\d\d)(?<d>\d\d)\.zip$/)
|
67
69
|
target_name = format("../data/%4d_%02d_%02d.tsv.gz", name_match[:y].to_i, name_match[:m].to_i, name_match[:d].to_i)
|
68
70
|
target_file = File.expand_path(target_name, __dir__)
|
69
71
|
|
70
|
-
logger.info("
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
72
|
+
logger.info("extracting ZIP file")
|
73
|
+
Zip::File.open_buffer(blz.body).each do |entry|
|
74
|
+
next unless entry.name == "BLZ.txt"
|
75
|
+
logger.info("found BLZ data")
|
76
|
+
|
77
|
+
entry.get_input_stream do |stream|
|
78
|
+
logger.info("reformatting, saving as #{target_file}")
|
79
|
+
Zlib::GzipWriter.open(target_file, Zlib::BEST_COMPRESSION) do |gz|
|
80
|
+
while line = stream.gets
|
81
|
+
line = line.encode(Encoding::UTF_8, Encoding::ISO_8859_15).chomp!
|
82
|
+
if line.length != METRIC_LEN
|
83
|
+
logger.error("expected line length #{METRIC_LEN}, got #{line.length} in '#{line}'")
|
84
|
+
next
|
85
|
+
end
|
86
|
+
|
87
|
+
i = 0
|
88
|
+
gz.puts METRIC.inject([]) {|s, m| s << line[i ... (i+=m)].strip }.join("\t")
|
89
|
+
end
|
77
90
|
end
|
78
|
-
|
79
|
-
i = 0
|
80
|
-
gz.puts METRIC.inject([]) {|s, m| s << line[i ... (i+=m)].strip }.join("\t")
|
81
91
|
end
|
82
92
|
end
|
83
93
|
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: blz
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.2.0.
|
4
|
+
version: 0.2.0.20190603
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Oliver Eilhard
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date:
|
12
|
+
date: 2019-05-03 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: bundler
|
@@ -87,6 +87,7 @@ files:
|
|
87
87
|
- data/2018_06_04.tsv.gz
|
88
88
|
- data/2018_09_03.tsv.gz
|
89
89
|
- data/2018_12_03.tsv.gz
|
90
|
+
- data/2019_06_03.tsv.gz
|
90
91
|
- lib/blz.rb
|
91
92
|
- lib/blz/bank.rb
|
92
93
|
- scripts/Makefile
|