ytj_client 0.2.2 → 0.2.3

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 455bb7b28b6be0a244f82d8be90a5a6d1dffa0d7
4
- data.tar.gz: f268be3fd7806007f44600fb2d9e3fbb9fb4ab15
3
+ metadata.gz: 47a276c1b5853c8a646a66cb67b4678615461fe4
4
+ data.tar.gz: ab7257eef607daca529f9639c68bc0634632b18b
5
5
  SHA512:
6
- metadata.gz: 26734252f07e86bc96bc022b5b70f0b39d5186e245a6cd5d79e1a9fbaccd5355635c71e082f24fef18526174592f0c4506de40cd37d7e90412861605401a7569
7
- data.tar.gz: bcacb0411072814d10fbea61cb6c8c6894a9ea2c84bccddb900a8f71822c06ee786ed27da71a1370ce675abb9b4495eca95b5b42467520e3fae44cd370a02393
6
+ metadata.gz: 42702fb6d17475bb9d16a077bde5dd652c506313ff58986b2fee390e1f5f74bb4cf97fe503fd94bd9b8ff58ee6f949a3687e0228f52cac4e01fae0482691765f
7
+ data.tar.gz: 7b092f3f0e14f4dc41b6cdeaf815063d8a8abe17f5164841c19e3a90188fad6623fe214d7e6d42e9f0b4d5cb6cc569c274425c2ad766d8bc3ff1e4d8d5645fd6
data/README.md CHANGED
@@ -54,6 +54,12 @@ YtjClient.fetch_company('2331972-7')
54
54
  ```
55
55
  ## Version history
56
56
 
57
+ ### 0.2.3
58
+
59
+ - Fixes to fetching all the companies:
60
+ - fetch one year and 1000 companies at a time
61
+ - save to csv right away
62
+
57
63
  ### 0.2.2
58
64
 
59
65
  - fetch all Finnish companies from TR api
@@ -1,3 +1,3 @@
1
1
  module YtjClient
2
- VERSION = "0.2.2"
2
+ VERSION = "0.2.3"
3
3
  end
data/lib/ytj_client.rb CHANGED
@@ -8,11 +8,15 @@ require 'active_support/core_ext/hash/slice'
8
8
  require 'active_support/core_ext/hash/keys'
9
9
  require 'active_support/core_ext/string/inflections'
10
10
 
11
+ require 'json'
12
+ require 'csv'
13
+
11
14
  module YtjClient
12
15
 
13
16
  YTJ_API_URL = 'http://avoindata.prh.fi:80/bis/v1/'.freeze
14
- TR_API_URL = 'http://avoindata.prh.fi:80/tr/v1?totalResults=false&maxResults=1000&resultsFrom=0&companyRegistrationFrom=
17
+ TR_API_URL = 'http://avoindata.prh.fi:80/tr/v1?totalResults=false&maxResults=1000&resultsFrom=0
15
18
  '.freeze
19
+ START_YEAR = 1896
16
20
 
17
21
  class << self
18
22
 
@@ -36,27 +40,62 @@ module YtjClient
36
40
  logger.error "Error fetching data from YTJ: #{$!.message} - #{$!.backtrace}"
37
41
  end
38
42
 
39
- def fetch_all_companies(to_date = '1896-01-01')
40
- all_companies = []
41
- url = TR_API_URL+to_date
43
+ def fetch_all_companies(format = 'csv')
44
+ overall_fetched_companies = 0
45
+ end_year = Time.now.year
42
46
 
43
- while true
44
- companies, url = fetch_1000_companies(url)
45
- companies.each do |result|
46
- all_companies << result.slice("businessId", "name", "companyForm", "registrationDate").symbolize_keys
47
- end
48
- break if url.blank?
47
+ end_year.downto(START_YEAR).to_a.each do |year|
48
+ overall_fetched_companies += fetch_year(year, format)
49
49
  end
50
- return all_companies
50
+
51
+ logger.info "Fetched #{overall_fetched_companies} companies and saved in #{format}"
52
+ return overall_fetched_companies
53
+ rescue
54
+ logger.error "Error fetching data from TR API: #{$!.message} - #{$!.backtrace}"
51
55
  end
52
56
 
53
57
  private
54
58
 
59
+ def fetch_year(year, format)
60
+ url = url = TR_API_URL+"&companyRegistrationFrom=#{year}-01-01&companyRegistrationTo=#{year}-12-31"
61
+ fetched_companies = 0
62
+ while true
63
+ companies, url = fetch_1000_companies(url)
64
+ logger.info "Fetched #{companies.size} companies."
65
+ logger.info "Next URL: #{url}"
66
+ save_companies(companies, format)
67
+ fetched_companies += companies.size
68
+ if url.blank?
69
+ logger.info "No more companies to get for year #{year}. Last response was #{companies.size} companies: #{companies}"
70
+ break
71
+ end
72
+ logger.info "Got #{fetched_companies} companies now, fetching some more"
73
+ sleep 5
74
+ end
75
+ logger.info "Got #{fetched_companies} for year #{year}. Moving on."
76
+ return fetched_companies
77
+ rescue
78
+ logger.error "Error fetching data for year #{year} from TR API: #{$!.message} - #{$!.backtrace}"
79
+ end
80
+
55
81
  def fetch_1000_companies(url)
56
82
  response = JSON.parse(RestClient.get(url).body)
57
83
  return response["results"], response["nextResultsUri"]
58
84
  end
59
85
 
86
+ def save_companies(companies, format)
87
+ case format
88
+ when 'csv'
89
+ CSV.open("companies.csv", "ab") do |csv|
90
+ companies.each do |company|
91
+ csv << [company["businessId"], company["companyForm"], company["name"], company["registrationDate"]]
92
+ end
93
+ end
94
+ else
95
+ logger.info "Unknown save format"
96
+ end
97
+ end
98
+
60
99
  # Returns a hash with all company data from YTJ
61
100
  def api_call(business_id)
62
101
  url = "#{YTJ_API_URL}#{business_id}"
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: ytj_client
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.2
4
+ version: 0.2.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - Janne Warén
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2016-12-27 00:00:00.000000000 Z
11
+ date: 2017-01-04 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler