daimon_skycrawlers 0.7.2 → 0.8.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 01cf8887ce71dff448a5130eadb47e702643a042
4
- data.tar.gz: 280aca91923bbb0f40af2996074b1d2f32a7e5ed
3
+ metadata.gz: 3c9d264378336af1acadb1bec4dc2dba783d9089
4
+ data.tar.gz: c532cd7213889299f369ba2e8e682ea69a39172a
5
5
  SHA512:
6
- metadata.gz: 81fe7008849bb2ef7936e2b169671c275669692d77440b2ebada247870fc0a67d08ccad2b9c1c5255c253a90ae22a81c221c163f1dc1b007918f4cc3f90f89fe
7
- data.tar.gz: ea67f734ed38fb348d4385ec7b603b88385b2c9f5161ba4765980fa8e355e754b08b680106e2da5e75a4badf4a98287c8f1ecb0b12ce60f140bbeeeb4487bc0f
6
+ metadata.gz: 86bcf270ee971a26cd029ab9bd9372cfbe544e15640b3275e897e8b0ec2131976db379330a047ef5ea03bf9f568c111e083a135ae1fddd3d0ead38a373086e33
7
+ data.tar.gz: a9aef67c6bcb70488a1ce0c5e54ed565f65e7a5490b2e0b7db87feb2960801bb829467d991ae4fb3ab52ddef145d25315ba03ca1e2178256b5147b7b52a043a2
@@ -37,6 +37,9 @@ module DaimonSkycrawlers
37
37
  when headers.key?("etag") && page.etag
38
38
  headers["etag"] != page.etag
39
39
  when headers.key?("last-modified") && page.last_modified_at
40
+ if headers["last-modified"] < page.last_modified_at
41
+ log.warn("#{url} returns old contents. #{headers["last-modified"]} < #{page.last_modified_at}")
42
+ end
40
43
  headers["last-modified"] > page.last_modified_at
41
44
  else
42
45
  true
@@ -27,7 +27,7 @@ module DaimonSkycrawlers
27
27
  timestamps: true
28
28
  }
29
29
  invoke(MigrationGenerator, [
30
- "CreatePage",
30
+ "CreatePages",
31
31
  "url:string",
32
32
  "headers:text",
33
33
  "body:binary",
@@ -37,6 +37,22 @@ module DaimonSkycrawlers
37
37
  migration_options)
38
38
  end
39
39
 
40
+ def insert_index
41
+ Dir.glob(File.join(destination_root, name, "db/migrate/*_create_pages.rb")) do |entry|
42
+ source = File.read(entry)
43
+ replaced_source = source.gsub(/(^ +)t.timestamps$/) do |_match; indent|
44
+ indent = $1
45
+ <<-CODE.chomp
46
+ #{indent}t.timestamps
47
+
48
+ #{indent}t.index [:url]
49
+ #{indent}t.index [:url, :updated_at]
50
+ CODE
51
+ end
52
+ File.write(entry, replaced_source)
53
+ end
54
+ end
55
+
40
56
  def copy_files
41
57
  [
42
58
  "Gemfile",
@@ -9,7 +9,7 @@ module DaimonSkycrawlers
9
9
  class RDB < Base
10
10
  def initialize(config_path = "config/database.yml")
11
11
  super()
12
- Base.configurations = YAML.load_file(config_path)
12
+ Base.configurations = YAML.load(ERB.new(::File.read(config_path)).result)
13
13
  Base.establish_connection(DaimonSkycrawlers.env.to_sym)
14
14
  end
15
15
 
@@ -34,7 +34,7 @@ module DaimonSkycrawlers
34
34
  # @param [String] url identity of the page
35
35
  #
36
36
  def find(url)
37
- Page.where(url: url).order(last_modified_at: :desc).limit(1).first
37
+ Page.where(url: url).order(updated_at: :desc).limit(1).first
38
38
  end
39
39
 
40
40
  class Base < ActiveRecord::Base
@@ -43,7 +43,7 @@ Rake::Task.define_task("db:load_config") do
43
43
  config.migrations_paths = ["db/migrate"]
44
44
  config.fixtures_path = "test/fixtures"
45
45
  config.seed_loader = seed_loader.new
46
- config.database_configuration = YAML.load_file("config/database.yml")
46
+ config.database_configuration = YAML.load(ERB.new(::File.read("config/database.yml")).result)
47
47
  end
48
48
  environment = ENV["SKYCRAWLERS_ENV"] || "development"
49
49
  ActiveRecord::Base.configurations = ActiveRecord::Tasks::DatabaseTasks.database_configuration
@@ -1,3 +1,3 @@
1
1
  module DaimonSkycrawlers
2
- VERSION = "0.7.2"
2
+ VERSION = "0.8.0"
3
3
  end
@@ -1,9 +1,8 @@
1
1
  PATH
2
2
  remote: ../../
3
3
  specs:
4
- daimon_skycrawlers (0.6.0)
4
+ daimon_skycrawlers (0.7.2)
5
5
  activerecord
6
- bundler (~> 1.11)
7
6
  faraday
8
7
  faraday_middleware
9
8
  nokogiri
@@ -45,7 +44,7 @@ GEM
45
44
  amq-protocol (2.0.1)
46
45
  arel (7.1.4)
47
46
  builder (3.2.2)
48
- bunny (2.6.0)
47
+ bunny (2.6.1)
49
48
  amq-protocol (>= 2.0.1)
50
49
  concurrent-ruby (1.0.2)
51
50
  erubis (2.7.0)
@@ -89,8 +88,8 @@ GEM
89
88
  thread_safe (0.3.5)
90
89
  timers (4.1.1)
91
90
  hitimes
92
- typhoeus (0.8.0)
93
- ethon (>= 0.8.0)
91
+ typhoeus (1.1.0)
92
+ ethon (>= 0.9.0)
94
93
  tzinfo (1.2.2)
95
94
  thread_safe (~> 0.1)
96
95
  webrobots (0.1.2)
@@ -1,13 +1,15 @@
1
- class CreatePage < ActiveRecord::Migration[5.0]
1
+ class CreatePages < ActiveRecord::Migration[5.0]
2
2
  def change
3
3
  create_table :pages do |t|
4
- t.string :url
4
+ t.string :url, index: true
5
5
  t.text :headers
6
6
  t.binary :body
7
7
  t.datetime :last_modified_at
8
8
  t.string :etag
9
9
 
10
10
  t.timestamps
11
+
12
+ t.index [:url, :updated_at]
11
13
  end
12
14
  end
13
15
  end
@@ -23,6 +23,8 @@ ActiveRecord::Schema.define(version: 20161018044144) do
23
23
  t.string "etag"
24
24
  t.datetime "created_at", null: false
25
25
  t.datetime "updated_at", null: false
26
+ t.index ["url", "updated_at"], name: "index_pages_on_url_and_updated_at", using: :btree
27
+ t.index ["url"], name: "index_pages_on_url", using: :btree
26
28
  end
27
29
 
28
30
  end
@@ -1,13 +1,15 @@
1
1
  class CreatePages < ActiveRecord::Migration
2
2
  def change
3
3
  create_table :pages do |t|
4
- t.string :url
4
+ t.string :url, index: true
5
5
  t.text :headers
6
6
  t.binary :body, limit: 10 * 1024 ** 2 # 10MiB
7
7
  t.datetime :last_modified_at
8
8
  t.string :etag
9
9
 
10
10
  t.timestamps null: false
11
+
12
+ t.index [:url, :updated_at]
11
13
  end
12
14
  end
13
15
  end
@@ -23,6 +23,8 @@ ActiveRecord::Schema.define(version: 20160830155803) do
23
23
  t.string "etag"
24
24
  t.datetime "created_at", null: false
25
25
  t.datetime "updated_at", null: false
26
+ t.index ["url", "updated_at"], name: "index_pages_on_url_and_updated_at", using: :btree
27
+ t.index ["url"], name: "index_pages_on_url", using: :btree
26
28
  end
27
29
 
28
30
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: daimon_skycrawlers
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.7.2
4
+ version: 0.8.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - daimon developers
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2016-10-25 00:00:00.000000000 Z
11
+ date: 2016-11-01 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: thor
@@ -363,7 +363,7 @@ files:
363
363
  - sample/itp-crawler/config/database.yml
364
364
  - sample/itp-crawler/config/database_itp.yml
365
365
  - sample/itp-crawler/config/init.rb
366
- - sample/itp-crawler/db/migrate/20161018044144_create_page.rb
366
+ - sample/itp-crawler/db/migrate/20161018044144_create_pages.rb
367
367
  - sample/itp-crawler/db/schema.rb
368
368
  - sample/itp-crawler/db_itp/migrate/20161020044144_create_shop.rb
369
369
  - sample/itp-crawler/db_itp/schema.rb