ytj_client 0.2.2 → 0.2.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 455bb7b28b6be0a244f82d8be90a5a6d1dffa0d7
4
- data.tar.gz: f268be3fd7806007f44600fb2d9e3fbb9fb4ab15
3
+ metadata.gz: 47a276c1b5853c8a646a66cb67b4678615461fe4
4
+ data.tar.gz: ab7257eef607daca529f9639c68bc0634632b18b
5
5
  SHA512:
6
- metadata.gz: 26734252f07e86bc96bc022b5b70f0b39d5186e245a6cd5d79e1a9fbaccd5355635c71e082f24fef18526174592f0c4506de40cd37d7e90412861605401a7569
7
- data.tar.gz: bcacb0411072814d10fbea61cb6c8c6894a9ea2c84bccddb900a8f71822c06ee786ed27da71a1370ce675abb9b4495eca95b5b42467520e3fae44cd370a02393
6
+ metadata.gz: 42702fb6d17475bb9d16a077bde5dd652c506313ff58986b2fee390e1f5f74bb4cf97fe503fd94bd9b8ff58ee6f949a3687e0228f52cac4e01fae0482691765f
7
+ data.tar.gz: 7b092f3f0e14f4dc41b6cdeaf815063d8a8abe17f5164841c19e3a90188fad6623fe214d7e6d42e9f0b4d5cb6cc569c274425c2ad766d8bc3ff1e4d8d5645fd6
data/README.md CHANGED
@@ -54,6 +54,12 @@ YtjClient.fetch_company('2331972-7')
54
54
  ```
55
55
  ## Version history
56
56
 
57
+ ### 0.2.3
58
+
59
+ - Fixes to fetching all the companies:
60
+ - fetch one year and 1000 companies at a time
61
+ - save to csv right away
62
+
57
63
  ### 0.2.2
58
64
 
59
65
  - fetch all Finnish companies from TR api
@@ -1,3 +1,3 @@
1
1
  module YtjClient
2
- VERSION = "0.2.2"
2
+ VERSION = "0.2.3"
3
3
  end
data/lib/ytj_client.rb CHANGED
@@ -8,11 +8,15 @@ require 'active_support/core_ext/hash/slice'
8
8
  require 'active_support/core_ext/hash/keys'
9
9
  require 'active_support/core_ext/string/inflections'
10
10
 
11
+ require 'json'
12
+ require 'csv'
13
+
11
14
  module YtjClient
12
15
 
13
16
  YTJ_API_URL = 'http://avoindata.prh.fi:80/bis/v1/'.freeze
14
- TR_API_URL = 'http://avoindata.prh.fi:80/tr/v1?totalResults=false&maxResults=1000&resultsFrom=0&companyRegistrationFrom=
17
+ TR_API_URL = 'http://avoindata.prh.fi:80/tr/v1?totalResults=false&maxResults=1000&resultsFrom=0
15
18
  '.freeze
19
+ START_YEAR = 1896
16
20
 
17
21
  class << self
18
22
 
@@ -36,27 +40,62 @@ module YtjClient
36
40
  logger.error "Error fetching data from YTJ: #{$!.message} - #{$!.backtrace}"
37
41
  end
38
42
 
39
- def fetch_all_companies(to_date = '1896-01-01')
40
- all_companies = []
41
- url = TR_API_URL+to_date
43
+ def fetch_all_companies(format = 'csv')
44
+ overall_fetched_companies = 0
45
+ end_year = Time.now.year
42
46
 
43
- while true
44
- companies, url = fetch_1000_companies(url)
45
- companies.each do |result|
46
- all_companies << result.slice("businessId", "name", "companyForm", "registrationDate").symbolize_keys
47
- end
48
- break if url.blank?
47
+ end_year.downto(START_YEAR).to_a.each do |year|
48
+ overall_fetched_companies += fetch_year(year, format)
49
49
  end
50
- return all_companies
50
+
51
+ logger.info "Fetched #{overall_fetched_companies} companies and saved in #{format}"
52
+ return overall_fetched_companies
53
+ rescue
54
+ logger.error "Error fetching data from TR API: #{$!.message} - #{$!.backtrace}"
51
55
  end
52
56
 
53
57
  private
54
58
 
59
+ def fetch_year(year, format)
60
+ url = url = TR_API_URL+"&companyRegistrationFrom=#{year}-01-01&companyRegistrationTo=#{year}-12-31"
61
+ fetched_companies = 0
62
+ while true
63
+ companies, url = fetch_1000_companies(url)
64
+ logger.info "Fetched #{companies.size} companies."
65
+ logger.info "Next URL: #{url}"
66
+ save_companies(companies, format)
67
+ fetched_companies += companies.size
68
+ if url.blank?
69
+ logger.info "No more companies to get for year #{year}. Last response was #{companies.size} companies: #{companies}"
70
+ break
71
+ end
72
+ logger.info "Got #{fetched_companies} companies now, fetching some more"
73
+ sleep 5
74
+ end
75
+ logger.info "Got #{fetched_companies} for year #{year}. Moving on."
76
+ return fetched_companies
77
+ rescue
78
+ logger.error "Error fetching data for year #{year} from TR API: #{$!.message} - #{$!.backtrace}"
79
+ end
80
+
55
81
  def fetch_1000_companies(url)
56
82
  response = JSON.parse(RestClient.get(url).body)
57
83
  return response["results"], response["nextResultsUri"]
58
84
  end
59
85
 
86
+ def save_companies(companies, format)
87
+ case format
88
+ when 'csv'
89
+ CSV.open("companies.csv", "ab") do |csv|
90
+ companies.each do |company|
91
+ csv << [company["businessId"], company["companyForm"], company["name"], company["registrationDate"]]
92
+ end
93
+ end
94
+ else
95
+ logger.info "Unknown save format"
96
+ end
97
+ end
98
+
60
99
  # Returns a hash with all company data from YTJ
61
100
  def api_call(business_id)
62
101
  url = "#{YTJ_API_URL}#{business_id}"
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: ytj_client
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.2
4
+ version: 0.2.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - Janne Warén
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2016-12-27 00:00:00.000000000 Z
11
+ date: 2017-01-04 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler