pupa 0.0.12 → 0.0.13

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: c894758e0375a999ec78a825feb05ff7ec9da66a
4
- data.tar.gz: 4f2633cc09888ef1f236cb2c35afdff9681e7924
3
+ metadata.gz: 37fa9e87e20d4fef24b1f028f046b695a3323b88
4
+ data.tar.gz: 0b2ae08ac3597955e7175812f05a700c76d91247
5
5
  SHA512:
6
- metadata.gz: 7e83f57b09cf99dab424032634193aa17055c22721b5ee558362f5fc07face6d6129b71d431e01f0b2b663aa771f752014ce7129e963d44d47beaa154f770c44
7
- data.tar.gz: a51e3cee53727013f60623576af75b7c0ce7c047085be577a5f745fed04babc62ffd64a3c59abf234b909ffca47bc73250645f39eaae0364a89ad131f2593426
6
+ metadata.gz: de98e5ed8f0b145e0ba77401c3e2e983cd47dc2b7a327476a03b6e04eecaeb31f976d76e98a7d51114afd87a95e36c9da491e6a4f4d788f71b02060121cbb94e
7
+ data.tar.gz: 69fe22ea3079034b34aea5b244d233f66eebefd5010ea923ca9865d475bc81ccaebd6cab94e721ff2944cef3b4842dc297c932e8cbc69d949a593a74b2fc46a3
@@ -1,6 +1,7 @@
1
1
  language: ruby
2
2
  rvm:
3
3
  - 2.0.0
4
+ - 2.1.0
4
5
  env:
5
6
  - MODE=default
6
7
  - MODE=compat
data/README.md CHANGED
@@ -5,7 +5,7 @@
5
5
  [![Coverage Status](https://coveralls.io/repos/opennorth/pupa-ruby/badge.png?branch=master)](https://coveralls.io/r/opennorth/pupa-ruby)
6
6
  [![Code Climate](https://codeclimate.com/github/opennorth/pupa-ruby.png)](https://codeclimate.com/github/opennorth/pupa-ruby)
7
7
 
8
- Pupa.rb is a Ruby 2.0 fork of Sunlight Labs' [Pupa](https://github.com/opencivicdata/pupa). It implements an Extract, Transform and Load (ETL) process to scrape data from online sources, transform it, and write it to a database.
8
+ Pupa.rb is a Ruby 2.x fork of Sunlight Labs' [Pupa](https://github.com/opencivicdata/pupa). It implements an Extract, Transform and Load (ETL) process to scrape data from online sources, transform it, and write it to a database.
9
9
 
10
10
  ## What it tries to solve
11
11
 
@@ -187,6 +187,10 @@ The `json-schema` gem is slow compared to, for example, [JSV](https://github.com
187
187
 
188
188
  The [pupa-validate](https://npmjs.org/package/pupa-validate) npm package can be used to validate JSON documents using the faster JSV. In an example case, using JSV instead of the `json-schema` gem reduced by half the time to validate 10,000 documents.
189
189
 
190
+ ### Ruby version
191
+
192
+ Pupa.rb requires Ruby 2.x. If you have already made all the above optimizations, you may notice a significant improvement by using Ruby 2.1, which has better garbage collection than Ruby 2.0.
193
+
190
194
  ### Profiling
191
195
 
192
196
  You can profile your code using [perftools.rb](https://github.com/tmm1/perftools.rb). First, install the gem:
@@ -46,18 +46,6 @@ module Pupa
46
46
  # @param [String,Hash] params query string parameters
47
47
  # @return a parsed document
48
48
  def get(url, params = {})
49
- # Faraday requires `params` to be a hash.
50
- if String === params
51
- params = CGI.parse(params)
52
-
53
- # Flatten the parameters for Faraday.
54
- params.each do |key,value|
55
- if Array === value && value.size == 1
56
- params[key] = value.first
57
- end
58
- end
59
- end
60
-
61
49
  client.get(url, params).body
62
50
  end
63
51
 
@@ -2,10 +2,12 @@ require 'active_support/cache'
2
2
  require 'faraday_middleware'
3
3
  require 'faraday_middleware/response_middleware'
4
4
 
5
+ require 'pupa/processor/middleware/gzip'
5
6
  require 'pupa/processor/middleware/logger'
6
7
  require 'pupa/processor/middleware/parse_html'
7
8
  require 'pupa/processor/middleware/parse_json'
8
9
  require 'pupa/processor/middleware/raise_error'
10
+ require 'pupa/refinements/faraday'
9
11
  require 'pupa/refinements/faraday_middleware'
10
12
 
11
13
  begin
@@ -49,6 +51,9 @@ module Pupa
49
51
  connection.use FaradayMiddleware::ParseXml, preserve_raw: true, content_type: /\bxml$/
50
52
  end
51
53
 
54
+ # Must come after the parser middlewares.
55
+ connection.use Middleware::Gzip
56
+
52
57
  if cache_dir
53
58
  connection.response :caching do
54
59
  address = cache_dir[%r{\Amemcached://(.+)\z}, 1]
@@ -0,0 +1,24 @@
1
+ module Pupa
2
+ class Processor
3
+ module Middleware
4
+ # A Faraday response middleware for parsing gzip responses.
5
+ #
6
+ # @see https://gist.github.com/romanbsd/3892387
7
+ class Gzip < Faraday::Response::Middleware
8
+ dependency 'zlib'
9
+
10
+ def on_complete(env)
11
+ encoding = env[:response_headers]['content-encoding'].to_s.downcase
12
+ case encoding
13
+ when 'gzip'
14
+ env[:body] = Zlib::GzipReader.new(StringIO.new(env[:body]), encoding: 'ASCII-8BIT').read
15
+ env[:response_headers].delete('content-encoding')
16
+ when 'deflate'
17
+ env[:body] = Zlib::Inflate.inflate(env[:body])
18
+ env[:response_headers].delete('content-encoding')
19
+ end
20
+ end
21
+ end
22
+ end
23
+ end
24
+ end
@@ -0,0 +1,26 @@
1
+ module Pupa
2
+ module Refinements
3
+ # Faraday requires `params` to be a hash.
4
+ module Connection
5
+ # @see https://github.com/lostisland/faraday/blob/b8d90a59bafb8dd6e19488fae07945a7700f5664/lib/faraday/connection.rb#L137
6
+ def get(url=nil, params=nil, headers=nil, &block)
7
+ if String === params
8
+ params = CGI.parse(params)
9
+
10
+ # Flatten the parameters for Faraday.
11
+ params.each do |key,value|
12
+ if Array === value && value.size == 1
13
+ params[key] = value.first
14
+ end
15
+ end
16
+ end
17
+
18
+ super(url, params, headers, &block)
19
+ end
20
+ end
21
+ end
22
+ end
23
+
24
+ class Faraday::Connection
25
+ prepend Pupa::Refinements::Connection
26
+ end
@@ -1,3 +1,3 @@
1
1
  module Pupa
2
- VERSION = "0.0.12"
2
+ VERSION = "0.0.13"
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: pupa
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.12
4
+ version: 0.0.13
5
5
  platform: ruby
6
6
  authors:
7
7
  - Open North
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2013-12-29 00:00:00.000000000 Z
11
+ date: 2014-02-11 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: activesupport
@@ -302,12 +302,14 @@ files:
302
302
  - lib/pupa/processor/document_store/file_store.rb
303
303
  - lib/pupa/processor/document_store/redis_store.rb
304
304
  - lib/pupa/processor/helper.rb
305
+ - lib/pupa/processor/middleware/gzip.rb
305
306
  - lib/pupa/processor/middleware/logger.rb
306
307
  - lib/pupa/processor/middleware/parse_html.rb
307
308
  - lib/pupa/processor/middleware/parse_json.rb
308
309
  - lib/pupa/processor/middleware/raise_error.rb
309
310
  - lib/pupa/processor/persistence.rb
310
311
  - lib/pupa/processor/yielder.rb
312
+ - lib/pupa/refinements/faraday.rb
311
313
  - lib/pupa/refinements/faraday_middleware.rb
312
314
  - lib/pupa/refinements/json-schema.rb
313
315
  - lib/pupa/refinements/opencivicdata.rb