kudzu-adapter-active_record 1.0.0 → 1.2.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +5 -5
- data/README.md +2 -2
- data/lib/generators/kudzu/adapter/active_record/migration_generator.rb +1 -1
- data/lib/generators/kudzu/adapter/active_record/templates/create_kudzu_chunks.rb.erb +9 -0
- data/lib/generators/kudzu/adapter/active_record/templates/create_kudzu_pages.rb.erb +0 -2
- data/lib/kudzu/adapter/active_record/all.rb +1 -5
- data/lib/kudzu/adapter/active_record/frontier.rb +1 -1
- data/lib/kudzu/adapter/active_record/model/chunk.rb +13 -0
- data/lib/kudzu/adapter/active_record/model/link.rb +3 -1
- data/lib/kudzu/adapter/active_record/model/page.rb +8 -2
- data/lib/kudzu/adapter/active_record/railtie.rb +18 -0
- data/lib/kudzu/adapter/active_record/repository.rb +30 -5
- data/lib/kudzu/adapter/active_record/version.rb +1 -1
- data/lib/kudzu/adapter/active_record.rb +17 -8
- metadata +17 -17
- data/lib/generators/kudzu/adapter/active_record/templates/create_kudzu_contents.rb.erb +0 -9
- data/lib/kudzu/adapter/active_record/model/content.rb +0 -9
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
|
-
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: 6070cb29d79bb57e5dd856f975ad58ae27d04ec1d5155a8ff93c65530ea3032c
|
4
|
+
data.tar.gz: a4f62c4f71e36dae0d5d9061443a606e54a3954220d0f94b059b9e6d38dd8978
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 9ebf5a08ea9fc737fd342b4eadf443c01caafb0e14a43f78a60aac3cd0dc8a269d63635aee857fa59674ab20b7a41a464a486876e8b5a19c730e328af68a1c31
|
7
|
+
data.tar.gz: 2257f8e31654c5512a1b217b5edf9a657440ec73b2b909ac67b717b9cc7c3ebbf81166e7921c1536c3ca2f00c77601a3d8951a18a9789b36fc4213b5cc6592ac
|
data/README.md
CHANGED
@@ -4,7 +4,7 @@ ActiveRecord adapter for kudzu crawler.
|
|
4
4
|
|
5
5
|
## Dependencies
|
6
6
|
|
7
|
-
* kudzu 1.
|
7
|
+
* kudzu 1.1+
|
8
8
|
* activerecord 5.0+
|
9
9
|
|
10
10
|
## Installation
|
@@ -30,7 +30,7 @@ Migrate into your application:
|
|
30
30
|
This migration creates following tables:
|
31
31
|
|
32
32
|
* kudzu_pages
|
33
|
-
*
|
33
|
+
* kudzu_chunks
|
34
34
|
* kudzu_links
|
35
35
|
|
36
36
|
## Usage
|
@@ -9,7 +9,7 @@ module Kudzu
|
|
9
9
|
def create
|
10
10
|
@migration_version = migration_version
|
11
11
|
timestamp = Time.now.utc.strftime("%Y%m%d%H%M%S").to_i
|
12
|
-
["create_kudzu_pages", "
|
12
|
+
["create_kudzu_pages", "create_kudzu_chunks", "create_kudzu_links"].each_with_index do |filename, i|
|
13
13
|
timestamp += i
|
14
14
|
template "#{filename}.rb.erb", "db/migrate/#{timestamp}_#{filename}.rb"
|
15
15
|
end
|
@@ -13,8 +13,6 @@ class CreateKudzuPages < ActiveRecord::Migration<%= @migration_version %>
|
|
13
13
|
t.text :redirect_from
|
14
14
|
t.datetime :fetched_at
|
15
15
|
t.datetime :revised_at
|
16
|
-
t.integer :revisit_interval
|
17
|
-
t.datetime :revisit_at
|
18
16
|
t.timestamps null: false
|
19
17
|
|
20
18
|
t.index :url, length: 32
|
@@ -1,10 +1,6 @@
|
|
1
1
|
require_relative 'model/base'
|
2
2
|
require_relative 'model/page'
|
3
|
-
require_relative 'model/
|
3
|
+
require_relative 'model/chunk'
|
4
4
|
require_relative 'model/link'
|
5
5
|
require_relative 'frontier'
|
6
6
|
require_relative 'repository'
|
7
|
-
|
8
|
-
Kudzu::Page = Kudzu::Adapter::ActiveRecord::Page
|
9
|
-
Kudzu::Link = Kudzu::Adapter::ActiveRecord::Link
|
10
|
-
Kudzu::Content = Kudzu::Adapter::ActiveRecord::Content
|
@@ -2,9 +2,9 @@ module Kudzu
|
|
2
2
|
module Adapter
|
3
3
|
module ActiveRecord
|
4
4
|
class Page < Base
|
5
|
-
include Kudzu::
|
5
|
+
include Kudzu::Model::Page
|
6
6
|
|
7
|
-
|
7
|
+
has_many :chunks, -> { order(id: :asc) }, dependent: :delete_all
|
8
8
|
|
9
9
|
def response_header
|
10
10
|
if response_header_column_is_text?
|
@@ -22,6 +22,10 @@ module Kudzu
|
|
22
22
|
end
|
23
23
|
end
|
24
24
|
|
25
|
+
def data
|
26
|
+
chunks.pluck(:data).join
|
27
|
+
end
|
28
|
+
|
25
29
|
private
|
26
30
|
|
27
31
|
def response_header_column_is_text?
|
@@ -31,4 +35,6 @@ module Kudzu
|
|
31
35
|
end
|
32
36
|
end
|
33
37
|
end
|
38
|
+
|
39
|
+
Page = Adapter::ActiveRecord::Page
|
34
40
|
end
|
@@ -0,0 +1,18 @@
|
|
1
|
+
module Kudzu
|
2
|
+
module Adapter
|
3
|
+
module ActiveRecord
|
4
|
+
class Railtie < Rails::Railtie
|
5
|
+
ActiveSupport.on_load :active_record do
|
6
|
+
require_relative 'all'
|
7
|
+
end
|
8
|
+
|
9
|
+
config.after_initialize do
|
10
|
+
Dir.glob(Rails.root + 'app/decorators/kudzu/**/*_decorator*.rb').each do |c|
|
11
|
+
require_dependency(c)
|
12
|
+
end
|
13
|
+
Kudzu.logger = Rails.logger
|
14
|
+
end
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
@@ -3,20 +3,45 @@ module Kudzu
|
|
3
3
|
module ActiveRecord
|
4
4
|
class Repository
|
5
5
|
def find_by_url(url)
|
6
|
-
Page.
|
6
|
+
Page.find_or_initialize_by(url: url)
|
7
7
|
end
|
8
8
|
|
9
9
|
def register(page)
|
10
|
-
|
11
|
-
|
12
|
-
|
10
|
+
ActiveRecord::Base.transaction do
|
11
|
+
page.save
|
12
|
+
save_chunks(page) if page.body
|
13
13
|
end
|
14
|
-
page.save
|
15
14
|
end
|
16
15
|
|
17
16
|
def delete(page)
|
18
17
|
page.destroy if page
|
19
18
|
end
|
19
|
+
|
20
|
+
private
|
21
|
+
|
22
|
+
def save_chunks(page)
|
23
|
+
chunk_num = 0
|
24
|
+
each_chunk(page.body, ActiveRecord.chunk_size) do |chunked, i|
|
25
|
+
chunk = page.chunks.select_without_data.offset(i).limit(1).first_or_initialize
|
26
|
+
chunk.data = chunked
|
27
|
+
chunk.save
|
28
|
+
chunk_num = i + 1
|
29
|
+
end
|
30
|
+
|
31
|
+
if page.chunks.count - chunk_num > 0
|
32
|
+
page.chunks.select_without_data.offset(chunk_num).each(&:delete)
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
def each_chunk(body, size)
|
37
|
+
pos = 0
|
38
|
+
i = 0
|
39
|
+
while (chunked = body.byteslice(pos, size))
|
40
|
+
yield chunked, i
|
41
|
+
pos += size
|
42
|
+
i += 1
|
43
|
+
end
|
44
|
+
end
|
20
45
|
end
|
21
46
|
end
|
22
47
|
end
|
@@ -1,19 +1,28 @@
|
|
1
1
|
require 'activerecord-import'
|
2
2
|
require 'kudzu'
|
3
3
|
|
4
|
+
if defined? Rails
|
5
|
+
require_relative 'active_record/railtie'
|
6
|
+
else
|
7
|
+
require_relative 'active_record/all'
|
8
|
+
end
|
9
|
+
|
4
10
|
module Kudzu
|
5
11
|
module Adapter
|
6
12
|
module ActiveRecord
|
13
|
+
class << self
|
14
|
+
@@chunk_size = 5*(1024**2)
|
15
|
+
|
16
|
+
def chunk_size
|
17
|
+
@@chunk_size
|
18
|
+
end
|
19
|
+
|
20
|
+
def chunk_size=(val)
|
21
|
+
@@chunk_size = val
|
22
|
+
end
|
23
|
+
end
|
7
24
|
end
|
8
25
|
end
|
9
26
|
end
|
10
27
|
|
11
28
|
Kudzu.adapter = Kudzu::Adapter::ActiveRecord
|
12
|
-
|
13
|
-
if defined? Railtie
|
14
|
-
ActiveSupport.on_load :active_record do
|
15
|
-
require_relative 'active_record/all'
|
16
|
-
end
|
17
|
-
else
|
18
|
-
require_relative 'active_record/all'
|
19
|
-
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: kudzu-adapter-active_record
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.
|
4
|
+
version: 1.2.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Yoshikazu Kaneta
|
8
|
-
autorequire:
|
8
|
+
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2024-01-04 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: kudzu
|
@@ -16,14 +16,14 @@ dependencies:
|
|
16
16
|
requirements:
|
17
17
|
- - ">="
|
18
18
|
- !ruby/object:Gem::Version
|
19
|
-
version: '1.
|
19
|
+
version: '1.1'
|
20
20
|
type: :runtime
|
21
21
|
prerelease: false
|
22
22
|
version_requirements: !ruby/object:Gem::Requirement
|
23
23
|
requirements:
|
24
24
|
- - ">="
|
25
25
|
- !ruby/object:Gem::Version
|
26
|
-
version: '1.
|
26
|
+
version: '1.1'
|
27
27
|
- !ruby/object:Gem::Dependency
|
28
28
|
name: activerecord
|
29
29
|
requirement: !ruby/object:Gem::Requirement
|
@@ -67,7 +67,7 @@ dependencies:
|
|
67
67
|
- !ruby/object:Gem::Version
|
68
68
|
version: '0'
|
69
69
|
- !ruby/object:Gem::Dependency
|
70
|
-
name:
|
70
|
+
name: webrick
|
71
71
|
requirement: !ruby/object:Gem::Requirement
|
72
72
|
requirements:
|
73
73
|
- - ">="
|
@@ -81,7 +81,7 @@ dependencies:
|
|
81
81
|
- !ruby/object:Gem::Version
|
82
82
|
version: '0'
|
83
83
|
- !ruby/object:Gem::Dependency
|
84
|
-
name:
|
84
|
+
name: sqlite3
|
85
85
|
requirement: !ruby/object:Gem::Requirement
|
86
86
|
requirements:
|
87
87
|
- - ">="
|
@@ -95,7 +95,7 @@ dependencies:
|
|
95
95
|
- !ruby/object:Gem::Version
|
96
96
|
version: '0'
|
97
97
|
- !ruby/object:Gem::Dependency
|
98
|
-
name:
|
98
|
+
name: mysql2
|
99
99
|
requirement: !ruby/object:Gem::Requirement
|
100
100
|
requirements:
|
101
101
|
- - ">="
|
@@ -109,7 +109,7 @@ dependencies:
|
|
109
109
|
- !ruby/object:Gem::Version
|
110
110
|
version: '0'
|
111
111
|
- !ruby/object:Gem::Dependency
|
112
|
-
name:
|
112
|
+
name: pg
|
113
113
|
requirement: !ruby/object:Gem::Requirement
|
114
114
|
requirements:
|
115
115
|
- - ">="
|
@@ -123,7 +123,7 @@ dependencies:
|
|
123
123
|
- !ruby/object:Gem::Version
|
124
124
|
version: '0'
|
125
125
|
- !ruby/object:Gem::Dependency
|
126
|
-
name:
|
126
|
+
name: rspec-rails
|
127
127
|
requirement: !ruby/object:Gem::Requirement
|
128
128
|
requirements:
|
129
129
|
- - ">="
|
@@ -137,7 +137,7 @@ dependencies:
|
|
137
137
|
- !ruby/object:Gem::Version
|
138
138
|
version: '0'
|
139
139
|
- !ruby/object:Gem::Dependency
|
140
|
-
name:
|
140
|
+
name: simplecov
|
141
141
|
requirement: !ruby/object:Gem::Requirement
|
142
142
|
requirements:
|
143
143
|
- - ">="
|
@@ -160,23 +160,24 @@ files:
|
|
160
160
|
- README.md
|
161
161
|
- Rakefile
|
162
162
|
- lib/generators/kudzu/adapter/active_record/migration_generator.rb
|
163
|
-
- lib/generators/kudzu/adapter/active_record/templates/
|
163
|
+
- lib/generators/kudzu/adapter/active_record/templates/create_kudzu_chunks.rb.erb
|
164
164
|
- lib/generators/kudzu/adapter/active_record/templates/create_kudzu_links.rb.erb
|
165
165
|
- lib/generators/kudzu/adapter/active_record/templates/create_kudzu_pages.rb.erb
|
166
166
|
- lib/kudzu/adapter/active_record.rb
|
167
167
|
- lib/kudzu/adapter/active_record/all.rb
|
168
168
|
- lib/kudzu/adapter/active_record/frontier.rb
|
169
169
|
- lib/kudzu/adapter/active_record/model/base.rb
|
170
|
-
- lib/kudzu/adapter/active_record/model/
|
170
|
+
- lib/kudzu/adapter/active_record/model/chunk.rb
|
171
171
|
- lib/kudzu/adapter/active_record/model/link.rb
|
172
172
|
- lib/kudzu/adapter/active_record/model/page.rb
|
173
|
+
- lib/kudzu/adapter/active_record/railtie.rb
|
173
174
|
- lib/kudzu/adapter/active_record/repository.rb
|
174
175
|
- lib/kudzu/adapter/active_record/version.rb
|
175
176
|
homepage: https://github.com/kanety/kudzu-adapter-active_record
|
176
177
|
licenses:
|
177
178
|
- MIT
|
178
179
|
metadata: {}
|
179
|
-
post_install_message:
|
180
|
+
post_install_message:
|
180
181
|
rdoc_options: []
|
181
182
|
require_paths:
|
182
183
|
- lib
|
@@ -191,9 +192,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
191
192
|
- !ruby/object:Gem::Version
|
192
193
|
version: '0'
|
193
194
|
requirements: []
|
194
|
-
|
195
|
-
|
196
|
-
signing_key:
|
195
|
+
rubygems_version: 3.3.3
|
196
|
+
signing_key:
|
197
197
|
specification_version: 4
|
198
198
|
summary: ActiveRecord adapter for kudzu crawler
|
199
199
|
test_files: []
|