ytj_client 0.2.2 → 0.2.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +6 -0
- data/lib/ytj_client/version.rb +1 -1
- data/lib/ytj_client.rb +50 -11
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 47a276c1b5853c8a646a66cb67b4678615461fe4
|
4
|
+
data.tar.gz: ab7257eef607daca529f9639c68bc0634632b18b
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 42702fb6d17475bb9d16a077bde5dd652c506313ff58986b2fee390e1f5f74bb4cf97fe503fd94bd9b8ff58ee6f949a3687e0228f52cac4e01fae0482691765f
|
7
|
+
data.tar.gz: 7b092f3f0e14f4dc41b6cdeaf815063d8a8abe17f5164841c19e3a90188fad6623fe214d7e6d42e9f0b4d5cb6cc569c274425c2ad766d8bc3ff1e4d8d5645fd6
|
data/README.md
CHANGED
@@ -54,6 +54,12 @@ YtjClient.fetch_company('2331972-7')
|
|
54
54
|
```
|
55
55
|
## Version history
|
56
56
|
|
57
|
+
### 0.2.3
|
58
|
+
|
59
|
+
- Fixes to fetching all the companies:
|
60
|
+
- fetch one year and 1000 companies at a time
|
61
|
+
- save to csv right away
|
62
|
+
|
57
63
|
### 0.2.2
|
58
64
|
|
59
65
|
- fetch all Finnish companies from TR api
|
data/lib/ytj_client/version.rb
CHANGED
data/lib/ytj_client.rb
CHANGED
@@ -8,11 +8,15 @@ require 'active_support/core_ext/hash/slice'
|
|
8
8
|
require 'active_support/core_ext/hash/keys'
|
9
9
|
require 'active_support/core_ext/string/inflections'
|
10
10
|
|
11
|
+
require 'json'
|
12
|
+
require 'csv'
|
13
|
+
|
11
14
|
module YtjClient
|
12
15
|
|
13
16
|
YTJ_API_URL = 'http://avoindata.prh.fi:80/bis/v1/'.freeze
|
14
|
-
TR_API_URL = 'http://avoindata.prh.fi:80/tr/v1?totalResults=false&maxResults=1000&resultsFrom=0
|
17
|
+
TR_API_URL = 'http://avoindata.prh.fi:80/tr/v1?totalResults=false&maxResults=1000&resultsFrom=0
|
15
18
|
'.freeze
|
19
|
+
START_YEAR = 1896
|
16
20
|
|
17
21
|
class << self
|
18
22
|
|
@@ -36,27 +40,62 @@ module YtjClient
|
|
36
40
|
logger.error "Error fetching data from YTJ: #{$!.message} - #{$!.backtrace}"
|
37
41
|
end
|
38
42
|
|
39
|
-
def fetch_all_companies(
|
40
|
-
|
41
|
-
|
43
|
+
def fetch_all_companies(format = 'csv')
|
44
|
+
overall_fetched_companies = 0
|
45
|
+
end_year = Time.now.year
|
42
46
|
|
43
|
-
|
44
|
-
|
45
|
-
companies.each do |result|
|
46
|
-
all_companies << result.slice("businessId", "name", "companyForm", "registrationDate").symbolize_keys
|
47
|
-
end
|
48
|
-
break if url.blank?
|
47
|
+
end_year.downto(START_YEAR).to_a.each do |year|
|
48
|
+
overall_fetched_companies += fetch_year(year, format)
|
49
49
|
end
|
50
|
-
|
50
|
+
|
51
|
+
logger.info "Fetched #{overall_fetched_companies} companies and saved in #{format}"
|
52
|
+
return overall_fetched_companies
|
53
|
+
rescue
|
54
|
+
logger.error "Error fetching data from TR API: #{$!.message} - #{$!.backtrace}"
|
51
55
|
end
|
52
56
|
|
53
57
|
private
|
54
58
|
|
59
|
+
def fetch_year(year, format)
|
60
|
+
url = url = TR_API_URL+"&companyRegistrationFrom=#{year}-01-01&companyRegistrationTo=#{year}-12-31"
|
61
|
+
fetched_companies = 0
|
62
|
+
while true
|
63
|
+
companies, url = fetch_1000_companies(url)
|
64
|
+
logger.info "Fetched #{companies.size} companies."
|
65
|
+
logger.info "Next URL: #{url}"
|
66
|
+
save_companies(companies, format)
|
67
|
+
fetched_companies += companies.size
|
68
|
+
if url.blank?
|
69
|
+
logger.info "No more companies to get for year #{year}. Last response was #{companies.size} companies: #{companies}"
|
70
|
+
break
|
71
|
+
end
|
72
|
+
logger.info "Got #{fetched_companies} companies now, fetching some more"
|
73
|
+
sleep 5
|
74
|
+
end
|
75
|
+
logger.info "Got #{fetched_companies} for year #{year}. Moving on."
|
76
|
+
return fetched_companies
|
77
|
+
rescue
|
78
|
+
logger.error "Error fetching data for year #{year} from TR API: #{$!.message} - #{$!.backtrace}"
|
79
|
+
end
|
80
|
+
|
55
81
|
def fetch_1000_companies(url)
|
56
82
|
response = JSON.parse(RestClient.get(url).body)
|
57
83
|
return response["results"], response["nextResultsUri"]
|
58
84
|
end
|
59
85
|
|
86
|
+
def save_companies(companies, format)
|
87
|
+
case format
|
88
|
+
when 'csv'
|
89
|
+
CSV.open("companies.csv", "ab") do |csv|
|
90
|
+
companies.each do |company|
|
91
|
+
csv << [company["businessId"], company["companyForm"], company["name"], company["registrationDate"]]
|
92
|
+
end
|
93
|
+
end
|
94
|
+
else
|
95
|
+
logger.info "Unknown save format"
|
96
|
+
end
|
97
|
+
end
|
98
|
+
|
60
99
|
# Returns a hash with all company data from YTJ
|
61
100
|
def api_call(business_id)
|
62
101
|
url = "#{YTJ_API_URL}#{business_id}"
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: ytj_client
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.2.
|
4
|
+
version: 0.2.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Janne Warén
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2017-01-04 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|