pupa 0.1.0 → 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: e225ec9f62bb2c542da3f691b529cea5b6f99c15
4
- data.tar.gz: 07726b314755421a2eac3d46756c46cd483ab913
3
+ metadata.gz: 940e9e4a78d4b1940728bd15b6ee770f07ac213a
4
+ data.tar.gz: 2cbd50c8ef323d747a37e2a8970253ee3b028bd4
5
5
  SHA512:
6
- metadata.gz: 5732c1a25dcbcd7aaac28a049905c1fa2c6e4909f18e3b8c75b82e063264a871ec465b79aaa9c5371621b01f23ab4aa73135bb5757de6721e4521ec185d8e27a
7
- data.tar.gz: d93fca74f1f8276c44de1ef228b85659875e839ce4a06a54a17f507423186485276c03c0f2be921b7f39e081fb622931f13659b116d76f9a920502f53427d5e6
6
+ metadata.gz: f5698681774b52440270b76fad66d1916120b2768a588c1ac38bdf55979ffb8f76a3cb1b70900896cfdaa858668633e26b3dab868f9bc875109b1e202deca93e
7
+ data.tar.gz: 9f4e8a37eea18a0e18584081d25c050057da631e70b66ff1d1afa6f2a73394caf9cc5dfdac86cc4d53138601ee75921d4ee31f00a23fb1dbea5b38fcc77153cc
data/.travis.yml CHANGED
@@ -10,3 +10,5 @@ services:
10
10
  - mongodb
11
11
  - postgresql
12
12
  - redis
13
+ before_script:
14
+ - createdb pupa_test
@@ -29,9 +29,10 @@ module Pupa
29
29
  # @param [String] cache_dir a directory or a Memcached address
30
30
  # (e.g. `memcached://localhost:11211`) in which to cache requests
31
31
  # @param [Integer] expires_in the cache's expiration time in seconds
32
+ # @param [Integer,String] value_max_bytes the maximum Memcached item size
32
33
  # @param [String] level the log level
33
34
  # @return [Faraday::Connection] a configured Faraday HTTP client
34
- def self.new(cache_dir: nil, expires_in: 86400, level: 'INFO') # 1 day
35
+ def self.new(cache_dir: nil, expires_in: 86400, value_max_bytes: 1048576, level: 'INFO') # 1 day
35
36
  Faraday.new do |connection|
36
37
  connection.request :url_encoded
37
38
  connection.use Middleware::Logger, Logger.new('faraday', level: level)
@@ -58,7 +59,7 @@ module Pupa
58
59
  connection.response :caching do
59
60
  address = cache_dir[%r{\Amemcached://(.+)\z}, 1]
60
61
  if address
61
- ActiveSupport::Cache::MemCacheStore.new(address, expires_in: expires_in)
62
+ ActiveSupport::Cache::MemCacheStore.new(address, expires_in: expires_in, value_max_bytes: 1048576)
62
63
  else
63
64
  ActiveSupport::Cache::FileStore.new(cache_dir, expires_in: expires_in)
64
65
  end
@@ -24,14 +24,15 @@ module Pupa
24
24
  # @param [String] cache_dir the directory or Memcached address
25
25
  # (e.g. `memcached://localhost:11211`) in which to cache HTTP responses
26
26
  # @param [Integer] expires_in the cache's expiration time in seconds
27
+ # @param [Integer,String] value_max_bytes the maximum Memcached item size
27
28
  # @param [String] database_url the database URL
28
29
  # @param [Boolean] validate whether to validate JSON documents
29
30
  # @param [String] level the log level
30
31
  # @param [String,IO] logdev the log device
31
32
  # @param [Hash] options criteria for selecting the methods to run
32
- def initialize(output_dir, pipelined: false, cache_dir: nil, expires_in: 86400, database_url: 'mongodb://localhost:27017/pupa', validate: true, level: 'INFO', logdev: STDOUT, options: {})
33
+ def initialize(output_dir, pipelined: false, cache_dir: nil, expires_in: 86400, value_max_bytes: 1048576, database_url: 'mongodb://localhost:27017/pupa', validate: true, level: 'INFO', logdev: STDOUT, options: {})
33
34
  @store = DocumentStore.new(output_dir, pipelined: pipelined)
34
- @client = Client.new(cache_dir: cache_dir, expires_in: expires_in, level: level)
35
+ @client = Client.new(cache_dir: cache_dir, expires_in: expires_in, value_max_bytes: value_max_bytes, level: level)
35
36
  @connection = Connection.new(database_url)
36
37
  @logger = Logger.new('pupa', level: level, logdev: logdev)
37
38
  @validate = validate
@@ -32,7 +32,7 @@ end
32
32
  #
33
33
  # Instead of adding a callback, we can override `to_h` when `persist` is `true`.
34
34
  ObjectSpace.each_object(Class) do |base|
35
- if base.include?(Pupa::Model)
35
+ if base != Sequel::Model && base.include?(Pupa::Model) # Sequel::Model will error on #include?
36
36
  base.class_eval do
37
37
  set_callback(:save, :before) do |object|
38
38
  object._type = object._type.camelize.demodulize.underscore
data/lib/pupa/runner.rb CHANGED
@@ -11,16 +11,17 @@ module Pupa
11
11
  @processor_class = processor_class
12
12
 
13
13
  @options = OpenStruct.new({
14
- actions: [],
15
- tasks: [],
16
- output_dir: File.expand_path('scraped_data', Dir.pwd),
17
- pipelined: false,
18
- cache_dir: File.expand_path('web_cache', Dir.pwd),
19
- expires_in: 86400, # 1 day
20
- database_url: 'mongodb://localhost:27017/pupa',
21
- validate: true,
22
- level: 'INFO',
23
- dry_run: false,
14
+ actions: [],
15
+ tasks: [],
16
+ output_dir: File.expand_path('scraped_data', Dir.pwd),
17
+ pipelined: false,
18
+ cache_dir: File.expand_path('web_cache', Dir.pwd),
19
+ expires_in: 86400, # 1 day
20
+ value_max_bytes: 1048576, # 1 MB
21
+ database_url: 'mongodb://localhost:27017/pupa',
22
+ validate: true,
23
+ level: 'INFO',
24
+ dry_run: false,
24
25
  }.merge(defaults))
25
26
 
26
27
  @actions = {
@@ -82,6 +83,9 @@ module Pupa
82
83
  opts.on('-e', '--expires_in SECONDS', "The cache's expiration time in seconds") do |v|
83
84
  options.expires_in = v
84
85
  end
86
+ opts.on('-value_max_bytes BYTES', "The maximum Memcached item size") do |v|
87
+ options.value_max_bytes = v
88
+ end
85
89
  opts.on('-d', '--database_url SCHEME://USERNAME:PASSWORD@HOST:PORT/DATABASE', 'The database URL') do |v|
86
90
  options.database_url = v
87
91
  end
@@ -142,6 +146,7 @@ module Pupa
142
146
  pipelined: options.pipelined,
143
147
  cache_dir: options.cache_dir,
144
148
  expires_in: options.expires_in,
149
+ value_max_bytes: options.value_max_bytes,
145
150
  database_url: options.database_url,
146
151
  validate: options.validate,
147
152
  level: options.level,
@@ -160,7 +165,7 @@ module Pupa
160
165
  end
161
166
 
162
167
  if options.level == 'DEBUG'
163
- %w(output_dir pipelined cache_dir expires_in database_url validate level).each do |option|
168
+ %w(output_dir pipelined cache_dir expires_in value_max_bytes database_url validate level).each do |option|
164
169
  puts "#{option}: #{options[option]}"
165
170
  end
166
171
  unless rest.empty?
data/lib/pupa/version.rb CHANGED
@@ -1,3 +1,3 @@
1
1
  module Pupa
2
- VERSION = "0.1.0"
2
+ VERSION = "0.1.1"
3
3
  end
@@ -14,7 +14,7 @@ describe Pupa::Processor::Connection::PostgreSQLAdapter do
14
14
  end
15
15
 
16
16
  before :all do
17
- connection.raw_connection.drop_table(:people)
17
+ connection.raw_connection.drop_table?(:people)
18
18
  connection.raw_connection.create_table(:people) do
19
19
  primary_key :id
20
20
  String :_id
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: pupa
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0
4
+ version: 0.1.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Open North
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-05-11 00:00:00.000000000 Z
11
+ date: 2014-05-23 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: activesupport