kudzu-adapter-active_record 1.0.0 → 1.1.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: b41da782783d660d5366be6d2c235d50722f8574
4
- data.tar.gz: f41f69bf3555773358301b07e9b6832c36424a79
3
+ metadata.gz: 72bd4ed5f1239eb982c765dae9fef82981e0dd9d
4
+ data.tar.gz: 4e81c7004721352b63e4f297f85c2947b7edd76a
5
5
  SHA512:
6
- metadata.gz: 1e3f5cc57bb822445c288c3a999614a18811ae3e835205b44295c8ac38b804327254436bb85236545822de1e097e20af35971dccb8cfd7bc09ffcf80f3ff86a5
7
- data.tar.gz: d8626598ce2b61a69eb75f3d897c0b881847a2745bd36db63cb2443b6e5f24300c6e08562c219556c824c932f548ca65fece1303cf4be581771c71aa528eda26
6
+ metadata.gz: 6b2049943de8e5fd38c3d96681e33d1686c59b9908ba50748092c6935e88d8e10d47be18360cc7d5ee7fb9365cbe58dd7af714151eb106a2f56959b19be7342b
7
+ data.tar.gz: 94defaf96e652e733249da1372cd85a683d08214efd4307b88ec98302ddddbd1c93668e0b4c4e9e1ed65b1f1d42a96937b7cd656dad19e220a10f868d670b173
data/README.md CHANGED
@@ -4,7 +4,7 @@ ActiveRecord adapter for kudzu crawler.
4
4
 
5
5
  ## Dependencies
6
6
 
7
- * kudzu 1.0+
7
+ * kudzu 1.1+
8
8
  * activerecord 5.0+
9
9
 
10
10
  ## Installation
@@ -30,7 +30,7 @@ Migrate into your application:
30
30
  This migration creates following tables:
31
31
 
32
32
  * kudzu_pages
33
- * kudzu_contents
33
+ * kudzu_chunks
34
34
  * kudzu_links
35
35
 
36
36
  ## Usage
@@ -9,7 +9,7 @@ module Kudzu
9
9
  def create
10
10
  @migration_version = migration_version
11
11
  timestamp = Time.now.utc.strftime("%Y%m%d%H%M%S").to_i
12
- ["create_kudzu_pages", "create_kudzu_contents", "create_kudzu_links"].each_with_index do |filename, i|
12
+ ["create_kudzu_pages", "create_kudzu_chunks", "create_kudzu_links"].each_with_index do |filename, i|
13
13
  timestamp += i
14
14
  template "#{filename}.rb.erb", "db/migrate/#{timestamp}_#{filename}.rb"
15
15
  end
@@ -0,0 +1,9 @@
1
+ class CreateKudzuChunks < ActiveRecord::Migration<%= @migration_version %>
2
+ def change
3
+ create_table :kudzu_chunks do |t|
4
+ t.references :page
5
+ t.binary :data
6
+ t.timestamps null: false
7
+ end
8
+ end
9
+ end
@@ -13,8 +13,6 @@ class CreateKudzuPages < ActiveRecord::Migration<%= @migration_version %>
13
13
  t.text :redirect_from
14
14
  t.datetime :fetched_at
15
15
  t.datetime :revised_at
16
- t.integer :revisit_interval
17
- t.datetime :revisit_at
18
16
  t.timestamps null: false
19
17
 
20
18
  t.index :url, length: 32
@@ -1,19 +1,28 @@
1
1
  require 'activerecord-import'
2
2
  require 'kudzu'
3
3
 
4
+ if defined? Rails
5
+ require_relative 'active_record/railtie'
6
+ else
7
+ require_relative 'active_record/all'
8
+ end
9
+
4
10
  module Kudzu
5
11
  module Adapter
6
12
  module ActiveRecord
13
+ class << self
14
+ @@chunk_size = 5*(1024**2)
15
+
16
+ def chunk_size
17
+ @@chunk_size
18
+ end
19
+
20
+ def chunk_size=(val)
21
+ @@chunk_size = val
22
+ end
23
+ end
7
24
  end
8
25
  end
9
26
  end
10
27
 
11
28
  Kudzu.adapter = Kudzu::Adapter::ActiveRecord
12
-
13
- if defined? Railtie
14
- ActiveSupport.on_load :active_record do
15
- require_relative 'active_record/all'
16
- end
17
- else
18
- require_relative 'active_record/all'
19
- end
@@ -1,10 +1,6 @@
1
1
  require_relative 'model/base'
2
2
  require_relative 'model/page'
3
- require_relative 'model/content'
3
+ require_relative 'model/chunk'
4
4
  require_relative 'model/link'
5
5
  require_relative 'frontier'
6
6
  require_relative 'repository'
7
-
8
- Kudzu::Page = Kudzu::Adapter::ActiveRecord::Page
9
- Kudzu::Link = Kudzu::Adapter::ActiveRecord::Link
10
- Kudzu::Content = Kudzu::Adapter::ActiveRecord::Content
@@ -0,0 +1,13 @@
1
+ module Kudzu
2
+ module Adapter
3
+ module ActiveRecord
4
+ class Chunk < Base
5
+ belongs_to :page
6
+
7
+ scope :select_without_data, -> { select(column_names - %w(data)) }
8
+ end
9
+ end
10
+ end
11
+
12
+ Chunk = Adapter::ActiveRecord::Chunk
13
+ end
@@ -2,8 +2,10 @@ module Kudzu
2
2
  module Adapter
3
3
  module ActiveRecord
4
4
  class Link < Base
5
- include Kudzu::Adapter::Base::Link
5
+ include Kudzu::Model::Link
6
6
  end
7
7
  end
8
8
  end
9
+
10
+ Link = Adapter::ActiveRecord::Link
9
11
  end
@@ -2,9 +2,9 @@ module Kudzu
2
2
  module Adapter
3
3
  module ActiveRecord
4
4
  class Page < Base
5
- include Kudzu::Adapter::Base::Page
5
+ include Kudzu::Model::Page
6
6
 
7
- has_one :content, dependent: :destroy
7
+ has_many :chunks, -> { order(id: :asc) }, dependent: :delete_all
8
8
 
9
9
  def response_header
10
10
  if response_header_column_is_text?
@@ -22,6 +22,10 @@ module Kudzu
22
22
  end
23
23
  end
24
24
 
25
+ def data
26
+ chunks.pluck(:data).join
27
+ end
28
+
25
29
  private
26
30
 
27
31
  def response_header_column_is_text?
@@ -31,4 +35,6 @@ module Kudzu
31
35
  end
32
36
  end
33
37
  end
38
+
39
+ Page = Adapter::ActiveRecord::Page
34
40
  end
@@ -0,0 +1,18 @@
1
+ module Kudzu
2
+ module Adapter
3
+ module ActiveRecord
4
+ class Railtie < Rails::Railtie
5
+ ActiveSupport.on_load :active_record do
6
+ require_relative 'all'
7
+ end
8
+
9
+ config.after_initialize do
10
+ Dir.glob(Rails.root + 'app/decorators/kudzu/**/*_decorator*.rb').each do |c|
11
+ require_dependency(c)
12
+ end
13
+ Kudzu.logger = Rails.logger
14
+ end
15
+ end
16
+ end
17
+ end
18
+ end
@@ -3,20 +3,45 @@ module Kudzu
3
3
  module ActiveRecord
4
4
  class Repository
5
5
  def find_by_url(url)
6
- Page.where(url: url).first_or_initialize
6
+ Page.find_or_initialize_by(url: url)
7
7
  end
8
8
 
9
9
  def register(page)
10
- if page.body
11
- content = page.content || page.build_content
12
- content.data = page.body
10
+ ActiveRecord::Base.transaction do
11
+ page.save
12
+ save_chunks(page) if page.body
13
13
  end
14
- page.save
15
14
  end
16
15
 
17
16
  def delete(page)
18
17
  page.destroy if page
19
18
  end
19
+
20
+ private
21
+
22
+ def save_chunks(page)
23
+ chunk_num = 0
24
+ each_chunk(page.body, ActiveRecord.chunk_size) do |chunked, i|
25
+ chunk = page.chunks.select_without_data.offset(i).limit(1).first_or_initialize
26
+ chunk.data = chunked
27
+ chunk.save
28
+ chunk_num = i + 1
29
+ end
30
+
31
+ if page.chunks.count - chunk_num > 0
32
+ page.chunks.select_without_data.offset(chunk_num).each(&:delete)
33
+ end
34
+ end
35
+
36
+ def each_chunk(body, size)
37
+ pos = 0
38
+ i = 0
39
+ while (chunked = body.byteslice(pos, size))
40
+ yield chunked, i
41
+ pos += size
42
+ i += 1
43
+ end
44
+ end
20
45
  end
21
46
  end
22
47
  end
@@ -1,7 +1,7 @@
1
1
  module Kudzu
2
2
  module Adapter
3
3
  module ActiveRecord
4
- VERSION = '1.0.0'
4
+ VERSION = '1.1.0'
5
5
  end
6
6
  end
7
7
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: kudzu-adapter-active_record
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.0
4
+ version: 1.1.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Yoshikazu Kaneta
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2017-12-20 00:00:00.000000000 Z
11
+ date: 2018-01-04 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: kudzu
@@ -16,14 +16,14 @@ dependencies:
16
16
  requirements:
17
17
  - - ">="
18
18
  - !ruby/object:Gem::Version
19
- version: '1.0'
19
+ version: '1.1'
20
20
  type: :runtime
21
21
  prerelease: false
22
22
  version_requirements: !ruby/object:Gem::Requirement
23
23
  requirements:
24
24
  - - ">="
25
25
  - !ruby/object:Gem::Version
26
- version: '1.0'
26
+ version: '1.1'
27
27
  - !ruby/object:Gem::Dependency
28
28
  name: activerecord
29
29
  requirement: !ruby/object:Gem::Requirement
@@ -80,6 +80,34 @@ dependencies:
80
80
  - - ">="
81
81
  - !ruby/object:Gem::Version
82
82
  version: '0'
83
+ - !ruby/object:Gem::Dependency
84
+ name: mysql2
85
+ requirement: !ruby/object:Gem::Requirement
86
+ requirements:
87
+ - - ">="
88
+ - !ruby/object:Gem::Version
89
+ version: '0'
90
+ type: :development
91
+ prerelease: false
92
+ version_requirements: !ruby/object:Gem::Requirement
93
+ requirements:
94
+ - - ">="
95
+ - !ruby/object:Gem::Version
96
+ version: '0'
97
+ - !ruby/object:Gem::Dependency
98
+ name: pg
99
+ requirement: !ruby/object:Gem::Requirement
100
+ requirements:
101
+ - - ">="
102
+ - !ruby/object:Gem::Version
103
+ version: '0'
104
+ type: :development
105
+ prerelease: false
106
+ version_requirements: !ruby/object:Gem::Requirement
107
+ requirements:
108
+ - - ">="
109
+ - !ruby/object:Gem::Version
110
+ version: '0'
83
111
  - !ruby/object:Gem::Dependency
84
112
  name: rspec-rails
85
113
  requirement: !ruby/object:Gem::Requirement
@@ -160,16 +188,17 @@ files:
160
188
  - README.md
161
189
  - Rakefile
162
190
  - lib/generators/kudzu/adapter/active_record/migration_generator.rb
163
- - lib/generators/kudzu/adapter/active_record/templates/create_kudzu_contents.rb.erb
191
+ - lib/generators/kudzu/adapter/active_record/templates/create_kudzu_chunks.rb.erb
164
192
  - lib/generators/kudzu/adapter/active_record/templates/create_kudzu_links.rb.erb
165
193
  - lib/generators/kudzu/adapter/active_record/templates/create_kudzu_pages.rb.erb
166
194
  - lib/kudzu/adapter/active_record.rb
167
195
  - lib/kudzu/adapter/active_record/all.rb
168
196
  - lib/kudzu/adapter/active_record/frontier.rb
169
197
  - lib/kudzu/adapter/active_record/model/base.rb
170
- - lib/kudzu/adapter/active_record/model/content.rb
198
+ - lib/kudzu/adapter/active_record/model/chunk.rb
171
199
  - lib/kudzu/adapter/active_record/model/link.rb
172
200
  - lib/kudzu/adapter/active_record/model/page.rb
201
+ - lib/kudzu/adapter/active_record/railtie.rb
173
202
  - lib/kudzu/adapter/active_record/repository.rb
174
203
  - lib/kudzu/adapter/active_record/version.rb
175
204
  homepage: https://github.com/kanety/kudzu-adapter-active_record
@@ -1,9 +0,0 @@
1
- class CreateKudzuContents < ActiveRecord::Migration<%= @migration_version %>
2
- def change
3
- create_table :kudzu_contents do |t|
4
- t.references :page
5
- t.binary :data
6
- t.timestamps null: false
7
- end
8
- end
9
- end
@@ -1,9 +0,0 @@
1
- module Kudzu
2
- module Adapter
3
- module ActiveRecord
4
- class Content < Base
5
- belongs_to :page
6
- end
7
- end
8
- end
9
- end