pupa 0.1.0 → 0.1.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: e225ec9f62bb2c542da3f691b529cea5b6f99c15
4
- data.tar.gz: 07726b314755421a2eac3d46756c46cd483ab913
3
+ metadata.gz: 940e9e4a78d4b1940728bd15b6ee770f07ac213a
4
+ data.tar.gz: 2cbd50c8ef323d747a37e2a8970253ee3b028bd4
5
5
  SHA512:
6
- metadata.gz: 5732c1a25dcbcd7aaac28a049905c1fa2c6e4909f18e3b8c75b82e063264a871ec465b79aaa9c5371621b01f23ab4aa73135bb5757de6721e4521ec185d8e27a
7
- data.tar.gz: d93fca74f1f8276c44de1ef228b85659875e839ce4a06a54a17f507423186485276c03c0f2be921b7f39e081fb622931f13659b116d76f9a920502f53427d5e6
6
+ metadata.gz: f5698681774b52440270b76fad66d1916120b2768a588c1ac38bdf55979ffb8f76a3cb1b70900896cfdaa858668633e26b3dab868f9bc875109b1e202deca93e
7
+ data.tar.gz: 9f4e8a37eea18a0e18584081d25c050057da631e70b66ff1d1afa6f2a73394caf9cc5dfdac86cc4d53138601ee75921d4ee31f00a23fb1dbea5b38fcc77153cc
data/.travis.yml CHANGED
@@ -10,3 +10,5 @@ services:
10
10
  - mongodb
11
11
  - postgresql
12
12
  - redis
13
+ before_script:
14
+ - createdb pupa_test
@@ -29,9 +29,10 @@ module Pupa
29
29
  # @param [String] cache_dir a directory or a Memcached address
30
30
  # (e.g. `memcached://localhost:11211`) in which to cache requests
31
31
  # @param [Integer] expires_in the cache's expiration time in seconds
32
+ # @param [Integer,String] value_max_bytes the maximum Memcached item size
32
33
  # @param [String] level the log level
33
34
  # @return [Faraday::Connection] a configured Faraday HTTP client
34
- def self.new(cache_dir: nil, expires_in: 86400, level: 'INFO') # 1 day
35
+ def self.new(cache_dir: nil, expires_in: 86400, value_max_bytes: 1048576, level: 'INFO') # 1 day
35
36
  Faraday.new do |connection|
36
37
  connection.request :url_encoded
37
38
  connection.use Middleware::Logger, Logger.new('faraday', level: level)
@@ -58,7 +59,7 @@ module Pupa
58
59
  connection.response :caching do
59
60
  address = cache_dir[%r{\Amemcached://(.+)\z}, 1]
60
61
  if address
61
- ActiveSupport::Cache::MemCacheStore.new(address, expires_in: expires_in)
62
+ ActiveSupport::Cache::MemCacheStore.new(address, expires_in: expires_in, value_max_bytes: 1048576)
62
63
  else
63
64
  ActiveSupport::Cache::FileStore.new(cache_dir, expires_in: expires_in)
64
65
  end
@@ -24,14 +24,15 @@ module Pupa
24
24
  # @param [String] cache_dir the directory or Memcached address
25
25
  # (e.g. `memcached://localhost:11211`) in which to cache HTTP responses
26
26
  # @param [Integer] expires_in the cache's expiration time in seconds
27
+ # @param [Integer,String] value_max_bytes the maximum Memcached item size
27
28
  # @param [String] database_url the database URL
28
29
  # @param [Boolean] validate whether to validate JSON documents
29
30
  # @param [String] level the log level
30
31
  # @param [String,IO] logdev the log device
31
32
  # @param [Hash] options criteria for selecting the methods to run
32
- def initialize(output_dir, pipelined: false, cache_dir: nil, expires_in: 86400, database_url: 'mongodb://localhost:27017/pupa', validate: true, level: 'INFO', logdev: STDOUT, options: {})
33
+ def initialize(output_dir, pipelined: false, cache_dir: nil, expires_in: 86400, value_max_bytes: 1048576, database_url: 'mongodb://localhost:27017/pupa', validate: true, level: 'INFO', logdev: STDOUT, options: {})
33
34
  @store = DocumentStore.new(output_dir, pipelined: pipelined)
34
- @client = Client.new(cache_dir: cache_dir, expires_in: expires_in, level: level)
35
+ @client = Client.new(cache_dir: cache_dir, expires_in: expires_in, value_max_bytes: value_max_bytes, level: level)
35
36
  @connection = Connection.new(database_url)
36
37
  @logger = Logger.new('pupa', level: level, logdev: logdev)
37
38
  @validate = validate
@@ -32,7 +32,7 @@ end
32
32
  #
33
33
  # Instead of adding a callback, we can override `to_h` when `persist` is `true`.
34
34
  ObjectSpace.each_object(Class) do |base|
35
- if base.include?(Pupa::Model)
35
+ if base != Sequel::Model && base.include?(Pupa::Model) # Sequel::Model will error on #include?
36
36
  base.class_eval do
37
37
  set_callback(:save, :before) do |object|
38
38
  object._type = object._type.camelize.demodulize.underscore
data/lib/pupa/runner.rb CHANGED
@@ -11,16 +11,17 @@ module Pupa
11
11
  @processor_class = processor_class
12
12
 
13
13
  @options = OpenStruct.new({
14
- actions: [],
15
- tasks: [],
16
- output_dir: File.expand_path('scraped_data', Dir.pwd),
17
- pipelined: false,
18
- cache_dir: File.expand_path('web_cache', Dir.pwd),
19
- expires_in: 86400, # 1 day
20
- database_url: 'mongodb://localhost:27017/pupa',
21
- validate: true,
22
- level: 'INFO',
23
- dry_run: false,
14
+ actions: [],
15
+ tasks: [],
16
+ output_dir: File.expand_path('scraped_data', Dir.pwd),
17
+ pipelined: false,
18
+ cache_dir: File.expand_path('web_cache', Dir.pwd),
19
+ expires_in: 86400, # 1 day
20
+ value_max_bytes: 1048576, # 1 MB
21
+ database_url: 'mongodb://localhost:27017/pupa',
22
+ validate: true,
23
+ level: 'INFO',
24
+ dry_run: false,
24
25
  }.merge(defaults))
25
26
 
26
27
  @actions = {
@@ -82,6 +83,9 @@ module Pupa
82
83
  opts.on('-e', '--expires_in SECONDS', "The cache's expiration time in seconds") do |v|
83
84
  options.expires_in = v
84
85
  end
86
+ opts.on('-value_max_bytes BYTES', "The maximum Memcached item size") do |v|
87
+ options.value_max_bytes = v
88
+ end
85
89
  opts.on('-d', '--database_url SCHEME://USERNAME:PASSWORD@HOST:PORT/DATABASE', 'The database URL') do |v|
86
90
  options.database_url = v
87
91
  end
@@ -142,6 +146,7 @@ module Pupa
142
146
  pipelined: options.pipelined,
143
147
  cache_dir: options.cache_dir,
144
148
  expires_in: options.expires_in,
149
+ value_max_bytes: options.value_max_bytes,
145
150
  database_url: options.database_url,
146
151
  validate: options.validate,
147
152
  level: options.level,
@@ -160,7 +165,7 @@ module Pupa
160
165
  end
161
166
 
162
167
  if options.level == 'DEBUG'
163
- %w(output_dir pipelined cache_dir expires_in database_url validate level).each do |option|
168
+ %w(output_dir pipelined cache_dir expires_in value_max_bytes database_url validate level).each do |option|
164
169
  puts "#{option}: #{options[option]}"
165
170
  end
166
171
  unless rest.empty?
data/lib/pupa/version.rb CHANGED
@@ -1,3 +1,3 @@
1
1
  module Pupa
2
- VERSION = "0.1.0"
2
+ VERSION = "0.1.1"
3
3
  end
@@ -14,7 +14,7 @@ describe Pupa::Processor::Connection::PostgreSQLAdapter do
14
14
  end
15
15
 
16
16
  before :all do
17
- connection.raw_connection.drop_table(:people)
17
+ connection.raw_connection.drop_table?(:people)
18
18
  connection.raw_connection.create_table(:people) do
19
19
  primary_key :id
20
20
  String :_id
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: pupa
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0
4
+ version: 0.1.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Open North
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-05-11 00:00:00.000000000 Z
11
+ date: 2014-05-23 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: activesupport