daimon_skycrawlers 0.7.2 → 0.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 01cf8887ce71dff448a5130eadb47e702643a042
4
- data.tar.gz: 280aca91923bbb0f40af2996074b1d2f32a7e5ed
3
+ metadata.gz: 3c9d264378336af1acadb1bec4dc2dba783d9089
4
+ data.tar.gz: c532cd7213889299f369ba2e8e682ea69a39172a
5
5
  SHA512:
6
- metadata.gz: 81fe7008849bb2ef7936e2b169671c275669692d77440b2ebada247870fc0a67d08ccad2b9c1c5255c253a90ae22a81c221c163f1dc1b007918f4cc3f90f89fe
7
- data.tar.gz: ea67f734ed38fb348d4385ec7b603b88385b2c9f5161ba4765980fa8e355e754b08b680106e2da5e75a4badf4a98287c8f1ecb0b12ce60f140bbeeeb4487bc0f
6
+ metadata.gz: 86bcf270ee971a26cd029ab9bd9372cfbe544e15640b3275e897e8b0ec2131976db379330a047ef5ea03bf9f568c111e083a135ae1fddd3d0ead38a373086e33
7
+ data.tar.gz: a9aef67c6bcb70488a1ce0c5e54ed565f65e7a5490b2e0b7db87feb2960801bb829467d991ae4fb3ab52ddef145d25315ba03ca1e2178256b5147b7b52a043a2
@@ -37,6 +37,9 @@ module DaimonSkycrawlers
37
37
  when headers.key?("etag") && page.etag
38
38
  headers["etag"] != page.etag
39
39
  when headers.key?("last-modified") && page.last_modified_at
40
+ if headers["last-modified"] < page.last_modified_at
41
+ log.warn("#{url} returns old contents. #{headers["last-modified"]} < #{page.last_modified_at}")
42
+ end
40
43
  headers["last-modified"] > page.last_modified_at
41
44
  else
42
45
  true
@@ -27,7 +27,7 @@ module DaimonSkycrawlers
27
27
  timestamps: true
28
28
  }
29
29
  invoke(MigrationGenerator, [
30
- "CreatePage",
30
+ "CreatePages",
31
31
  "url:string",
32
32
  "headers:text",
33
33
  "body:binary",
@@ -37,6 +37,22 @@ module DaimonSkycrawlers
37
37
  migration_options)
38
38
  end
39
39
 
40
+ def insert_index
41
+ Dir.glob(File.join(destination_root, name, "db/migrate/*_create_pages.rb")) do |entry|
42
+ source = File.read(entry)
43
+ replaced_source = source.gsub(/(^ +)t.timestamps$/) do |_match; indent|
44
+ indent = $1
45
+ <<-CODE.chomp
46
+ #{indent}t.timestamps
47
+
48
+ #{indent}t.index [:url]
49
+ #{indent}t.index [:url, :updated_at]
50
+ CODE
51
+ end
52
+ File.write(entry, replaced_source)
53
+ end
54
+ end
55
+
40
56
  def copy_files
41
57
  [
42
58
  "Gemfile",
@@ -9,7 +9,7 @@ module DaimonSkycrawlers
9
9
  class RDB < Base
10
10
  def initialize(config_path = "config/database.yml")
11
11
  super()
12
- Base.configurations = YAML.load_file(config_path)
12
+ Base.configurations = YAML.load(ERB.new(::File.read(config_path)).result)
13
13
  Base.establish_connection(DaimonSkycrawlers.env.to_sym)
14
14
  end
15
15
 
@@ -34,7 +34,7 @@ module DaimonSkycrawlers
34
34
  # @param [String] url identity of the page
35
35
  #
36
36
  def find(url)
37
- Page.where(url: url).order(last_modified_at: :desc).limit(1).first
37
+ Page.where(url: url).order(updated_at: :desc).limit(1).first
38
38
  end
39
39
 
40
40
  class Base < ActiveRecord::Base
@@ -43,7 +43,7 @@ Rake::Task.define_task("db:load_config") do
43
43
  config.migrations_paths = ["db/migrate"]
44
44
  config.fixtures_path = "test/fixtures"
45
45
  config.seed_loader = seed_loader.new
46
- config.database_configuration = YAML.load_file("config/database.yml")
46
+ config.database_configuration = YAML.load(ERB.new(::File.read("config/database.yml")).result)
47
47
  end
48
48
  environment = ENV["SKYCRAWLERS_ENV"] || "development"
49
49
  ActiveRecord::Base.configurations = ActiveRecord::Tasks::DatabaseTasks.database_configuration
@@ -1,3 +1,3 @@
1
1
  module DaimonSkycrawlers
2
- VERSION = "0.7.2"
2
+ VERSION = "0.8.0"
3
3
  end
@@ -1,9 +1,8 @@
1
1
  PATH
2
2
  remote: ../../
3
3
  specs:
4
- daimon_skycrawlers (0.6.0)
4
+ daimon_skycrawlers (0.7.2)
5
5
  activerecord
6
- bundler (~> 1.11)
7
6
  faraday
8
7
  faraday_middleware
9
8
  nokogiri
@@ -45,7 +44,7 @@ GEM
45
44
  amq-protocol (2.0.1)
46
45
  arel (7.1.4)
47
46
  builder (3.2.2)
48
- bunny (2.6.0)
47
+ bunny (2.6.1)
49
48
  amq-protocol (>= 2.0.1)
50
49
  concurrent-ruby (1.0.2)
51
50
  erubis (2.7.0)
@@ -89,8 +88,8 @@ GEM
89
88
  thread_safe (0.3.5)
90
89
  timers (4.1.1)
91
90
  hitimes
92
- typhoeus (0.8.0)
93
- ethon (>= 0.8.0)
91
+ typhoeus (1.1.0)
92
+ ethon (>= 0.9.0)
94
93
  tzinfo (1.2.2)
95
94
  thread_safe (~> 0.1)
96
95
  webrobots (0.1.2)
@@ -1,13 +1,15 @@
1
- class CreatePage < ActiveRecord::Migration[5.0]
1
+ class CreatePages < ActiveRecord::Migration[5.0]
2
2
  def change
3
3
  create_table :pages do |t|
4
- t.string :url
4
+ t.string :url, index: true
5
5
  t.text :headers
6
6
  t.binary :body
7
7
  t.datetime :last_modified_at
8
8
  t.string :etag
9
9
 
10
10
  t.timestamps
11
+
12
+ t.index [:url, :updated_at]
11
13
  end
12
14
  end
13
15
  end
@@ -23,6 +23,8 @@ ActiveRecord::Schema.define(version: 20161018044144) do
23
23
  t.string "etag"
24
24
  t.datetime "created_at", null: false
25
25
  t.datetime "updated_at", null: false
26
+ t.index ["url", "updated_at"], name: "index_pages_on_url_and_updated_at", using: :btree
27
+ t.index ["url"], name: "index_pages_on_url", using: :btree
26
28
  end
27
29
 
28
30
  end
@@ -1,13 +1,15 @@
1
1
  class CreatePages < ActiveRecord::Migration
2
2
  def change
3
3
  create_table :pages do |t|
4
- t.string :url
4
+ t.string :url, index: true
5
5
  t.text :headers
6
6
  t.binary :body, limit: 10 * 1024 ** 2 # 10MiB
7
7
  t.datetime :last_modified_at
8
8
  t.string :etag
9
9
 
10
10
  t.timestamps null: false
11
+
12
+ t.index [:url, :updated_at]
11
13
  end
12
14
  end
13
15
  end
@@ -23,6 +23,8 @@ ActiveRecord::Schema.define(version: 20160830155803) do
23
23
  t.string "etag"
24
24
  t.datetime "created_at", null: false
25
25
  t.datetime "updated_at", null: false
26
+ t.index ["url", "updated_at"], name: "index_pages_on_url_and_updated_at", using: :btree
27
+ t.index ["url"], name: "index_pages_on_url", using: :btree
26
28
  end
27
29
 
28
30
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: daimon_skycrawlers
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.7.2
4
+ version: 0.8.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - daimon developers
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2016-10-25 00:00:00.000000000 Z
11
+ date: 2016-11-01 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: thor
@@ -363,7 +363,7 @@ files:
363
363
  - sample/itp-crawler/config/database.yml
364
364
  - sample/itp-crawler/config/database_itp.yml
365
365
  - sample/itp-crawler/config/init.rb
366
- - sample/itp-crawler/db/migrate/20161018044144_create_page.rb
366
+ - sample/itp-crawler/db/migrate/20161018044144_create_pages.rb
367
367
  - sample/itp-crawler/db/schema.rb
368
368
  - sample/itp-crawler/db_itp/migrate/20161020044144_create_shop.rb
369
369
  - sample/itp-crawler/db_itp/schema.rb