maiha-dm-ys 0.3 → 0.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/Rakefile CHANGED
@@ -33,7 +33,7 @@ AUTHOR = "maiha"
33
33
  EMAIL = "maiha@wota.jp"
34
34
  HOMEPAGE = "http://github.com/maiha/dm-ys"
35
35
  SUMMARY = "a DataMapper extension that uses html table as its schema and data powerfully like YunkerStar"
36
- GEM_VERSION = "0.3"
36
+ GEM_VERSION = "0.3.1"
37
37
 
38
38
  spec = Gem::Specification.new do |s|
39
39
  # s.rubyforge_project = 'merb'
@@ -4,6 +4,7 @@ require 'dm-core'
4
4
  require 'dsl_accessor'
5
5
 
6
6
  __DIR__ = File.dirname(__FILE__)
7
+ require __DIR__ + '/dm-ys/config'
7
8
  require __DIR__ + '/dm-ys/base'
8
9
  require __DIR__ + '/dm-ys/cached_accessor'
9
10
  require __DIR__ + '/dm-ys/memory_repository'
@@ -0,0 +1,30 @@
1
+ module DataMapper
2
+ module YunkerStar
3
+ class Config
4
+ def self.default
5
+ {:max_pages=>100, :uniq=>true}
6
+ end
7
+
8
+ def initialize(options = nil)
9
+ @options = options
10
+ @options = self.class.default unless @options.is_a?(Hash)
11
+ end
12
+
13
+ def [](key)
14
+ @options[key]
15
+ end
16
+
17
+ def []=(key, val)
18
+ @options[key] = val
19
+ end
20
+
21
+ def uniq_page?
22
+ !!self[:uniq]
23
+ end
24
+
25
+ def uniq_entry?
26
+ self[:uniq] == true or self[:uniq] == :entry
27
+ end
28
+ end
29
+ end
30
+ end
@@ -18,10 +18,13 @@ module DataMapper
18
18
  dsl_accessor :table
19
19
  dsl_accessor :tbody
20
20
  dsl_accessor :thead
21
+ dsl_accessor :ys, :default=>proc{|*a| DataMapper::YunkerStar::Config.new}
21
22
  property :id, DataMapper::Types::Serial
22
23
  end
23
24
  end
24
25
 
26
+ class MaxPagesOverflow < RuntimeError; end
27
+
25
28
  module ClassMethods
26
29
  def proxy
27
30
  @proxy ||= Scraper.load(self)
@@ -7,6 +7,7 @@ module DataMapper
7
7
  require 'nkf'
8
8
  require 'open-uri'
9
9
  require 'hpricot'
10
+ require 'digest/sha1'
10
11
 
11
12
  module Scraper
12
13
  class TableNotFound < RuntimeError; end
@@ -59,6 +60,10 @@ module DataMapper
59
60
  define_method(method) {raise NotImplementedError, method.to_s}
60
61
  end
61
62
 
63
+ def count
64
+ entries.size
65
+ end
66
+
62
67
  def uri
63
68
  @uri || @model.uri.to_s.chomp('*')
64
69
  end
@@ -99,11 +104,16 @@ module DataMapper
99
104
  attrs = [
100
105
  [ :html, "#{html.size}bytes" ],
101
106
  [ :names, names ],
102
- [ :entries, entries.size ],
107
+ [ :entries, count ],
103
108
  ]
104
109
  "#<#{self.class.name} #{attrs.map { |(k,v)| "@#{k}=#{v.inspect}" } * ' '}>"
105
110
  end
106
111
 
112
+ def page_hash
113
+ body = entries.flatten.join("\t")
114
+ Digest::SHA1.hexdigest(body)
115
+ end
116
+
107
117
  cached_accessor do
108
118
  doc {Hpricot(@html)}
109
119
  table {specified(:table) or guess_table}
@@ -112,7 +122,6 @@ module DataMapper
112
122
  names {labels.map{|i| label2name(i)}}
113
123
  labels {thead.search("> tr").first.search("> td|th").map{|i|strip_tags(i.inner_html)}}
114
124
  entries {tbody.search("> tr").map{|tr| tr.search("> td").map{|i|strip_tags(i.inner_html)}}.delete_if{|i|i.blank?}}
115
- count {entries.size}
116
125
  end
117
126
 
118
127
  private
@@ -178,10 +187,6 @@ module DataMapper
178
187
  @pages ||= execute
179
188
  end
180
189
 
181
- def count
182
- pages.map(&:count).inject(0){|i,v| i+v}
183
- end
184
-
185
190
  def names
186
191
  pages.first.names
187
192
  end
@@ -191,33 +196,66 @@ module DataMapper
191
196
  end
192
197
 
193
198
  def entries
194
- pages.inject([]){|a,p| a+p.entries}
199
+ records = []
200
+ digests = Set.new
201
+ pages.each do |page|
202
+ page.entries.each do |entry|
203
+ if config.uniq_entry?
204
+ sha1 = Digest::SHA1.hexdigest(entry.join("\t"))
205
+ next if digests.include?(sha1)
206
+ digests << sha1
207
+ end
208
+ records << entry
209
+ end
210
+ end
211
+ return records
195
212
  end
196
213
 
197
214
  private
198
215
  def execute
199
216
  visit(uri)
200
- valid_pages
217
+ uniq_pages
218
+ end
219
+
220
+ def config
221
+ @model.ys
201
222
  end
202
223
 
203
- def valid_pages
204
- loaded_pages.values.compact
224
+ def uniq_pages
225
+ return loaded_pages unless config.uniq_page?
226
+
227
+ digests = Set.new
228
+ loaded_pages.select do |page|
229
+ sha1 = page.page_hash
230
+ if digests.include?(sha1)
231
+ false
232
+ else
233
+ digests << sha1
234
+ true
235
+ end
236
+ end
205
237
  end
206
238
 
207
239
  def loaded_pages
208
- @loaded_pages ||= {} # url => page object
240
+ loaded_pages_hash.values.compact
241
+ end
242
+
243
+ def loaded_pages_hash
244
+ @loaded_pages_hash ||= {} # url => page object
209
245
  end
210
246
 
211
- def visit(uri)
212
- return if loaded_pages[uri]
247
+ def visit(uri, runtime_options = {:count => 0})
248
+ return if loaded_pages_hash[uri]
249
+ raise Proxy::MaxPagesOverflow if (runtime_options[:count]+=1) > @model.ys[:max_pages]
250
+
213
251
  page = Page.new(@model, uri)
214
- base = valid_pages.first
252
+ base = loaded_pages.first
215
253
  if !base or base.names == page.names
216
- loaded_pages[uri] = page
254
+ loaded_pages_hash[uri] = page
217
255
  else
218
- loaded_pages[uri] = nil
256
+ loaded_pages_hash[uri] = nil
219
257
  end
220
- page.pagination_links.each{|uri| visit(uri)}
258
+ page.pagination_links.each{|uri| visit(uri, runtime_options)}
221
259
  end
222
260
  end
223
261
 
@@ -66,4 +66,56 @@ describe DataMapper::YunkerStar::Scraper::Composite do
66
66
  @scraper.entries.should == (Plugin1.entries + Plugin2.entries)
67
67
  end
68
68
  end
69
+
70
+ describe "SortedPlugin" do
71
+ it "should return 6 pages" do
72
+ SortedPlugin.proxy.pages.size.should == 6
73
+ end
74
+
75
+ describe "#entries" do
76
+ it "should return duplicate entries" do
77
+ SortedPlugin.entries.sort.should == ((SortedPlugin1.entries + SortedPlugin2.entries)*3).sort
78
+ end
79
+ end
80
+
81
+ describe "#count" do
82
+ it "should return duplicate entries" do
83
+ SortedPlugin.count.should == (SortedPlugin1.count + SortedPlugin2.count)*3
84
+ end
85
+ end
86
+ end
87
+
88
+ describe "SortedPlugin with uniq page option" do
89
+ it "should return 2 pages" do
90
+ SortedPluginWithUniqPage.proxy.pages.size.should == 2
91
+ end
92
+
93
+ describe "#entries" do
94
+ it "should return same value as Plugin" do
95
+ SortedPluginWithUniqPage.entries.should == (SortedPlugin1.entries + SortedPlugin2.entries)
96
+ end
97
+ end
98
+
99
+ describe "#count" do
100
+ it "should return same value as Plugin" do
101
+ SortedPluginWithUniqPage.count.should == (SortedPlugin1.count + SortedPlugin2.count)
102
+ end
103
+ end
104
+ end
105
+
106
+ describe "UniqPlugin" do
107
+ it "should return 2 pages" do
108
+ UniqPlugin.proxy.pages.size.should == 2
109
+ end
110
+
111
+ describe "#count" do
112
+ it "should return same value as Plugin" do
113
+ UniqPlugin1.count.should == 20
114
+ UniqPlugin2.count.should == 4
115
+ UniqPlugin .count.should == 22
116
+ end
117
+ end
118
+ end
119
+
120
+
69
121
  end
@@ -0,0 +1,68 @@
1
+ require File.join( File.dirname(__FILE__), "spec_helper" )
2
+
3
+ describe DataMapper::YunkerStar::Config do
4
+ before(:each) do
5
+ @config = DataMapper::YunkerStar::Config.new
6
+ end
7
+
8
+ it "should provide []" do
9
+ @config.should respond_to(:[])
10
+ end
11
+
12
+ describe "[:uniq]" do
13
+ it "should has true as default value" do
14
+ @config[:uniq].should == true
15
+ end
16
+ end
17
+
18
+ it "should provide #uniq_page?" do
19
+ @config.should respond_to(:uniq_page?)
20
+ end
21
+
22
+ describe "#uniq_page?" do
23
+ it "should return true as default value" do
24
+ @config.uniq_page?.should == true
25
+ end
26
+
27
+ it "should return true when :uniq is set to :page" do
28
+ config = DataMapper::YunkerStar::Config.new(:uniq=>:page)
29
+ config.uniq_page?.should == true
30
+ end
31
+
32
+ it "should return true when :uniq is set to :entry" do
33
+ config = DataMapper::YunkerStar::Config.new(:uniq=>:entry)
34
+ config.uniq_page?.should == true
35
+ end
36
+
37
+ it "should return false when :uniq is set to false" do
38
+ config = DataMapper::YunkerStar::Config.new(:uniq=>false)
39
+ config.uniq_page?.should == false
40
+ end
41
+ end
42
+
43
+ it "should provide #uniq_entry?" do
44
+ @config.should respond_to(:uniq_entry?)
45
+ end
46
+
47
+ describe "#uniq_entry?" do
48
+ it "should return true as default value" do
49
+ @config.uniq_entry?.should == true
50
+ end
51
+
52
+ it "should return false when :uniq is set to :page" do
53
+ config = DataMapper::YunkerStar::Config.new(:uniq=>:page)
54
+ config.uniq_entry?.should == false
55
+ end
56
+
57
+ it "should return true when :uniq is set to :entry" do
58
+ config = DataMapper::YunkerStar::Config.new(:uniq=>:entry)
59
+ config.uniq_entry?.should == true
60
+ end
61
+
62
+ it "should return false when :uniq is set to :page" do
63
+ config = DataMapper::YunkerStar::Config.new(:uniq=>false)
64
+ config.uniq_entry?.should == false
65
+ end
66
+
67
+ end
68
+ end
@@ -0,0 +1,262 @@
1
+ <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
2
+ <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en-us" lang="en-us">
3
+ <head>
4
+ <title>Merbist Plugins</title>
5
+ <meta http-equiv="content-type" content="text/html; charset=utf-8" />
6
+ <meta name="verify-v1"
7
+ content="QqJ1Kmvs51kF+1Sn+7JUqkXTXbnmLVKzFctoGLRDLE8=" />
8
+ <link rel="stylesheet" href="/stylesheets/master.css" type="text/css"
9
+ media="screen" charset="utf-8" />
10
+ <script src="/javascripts/jquery.js" type="text/javascript"></script>
11
+ <link rel="alternate" type="application/rss+xml" title="Atom" href="/plugins.atom"></link>
12
+
13
+ </head>
14
+ <body>
15
+ <div id="base">
16
+ <div id="header">
17
+ <div id="navi">
18
+ <a href="/">Top</a>
19
+ <a href="/plugins">Plugins</a>
20
+ <a href="/talks">Talks</a>
21
+ <a href="/sites">Sites</a>
22
+ <a href="/users">Users</a>
23
+ </div>
24
+ <div id="menu">
25
+ <a href="/login">Login</a>
26
+ <a href="/users/new">Sing up</a>
27
+ </div>
28
+ <div class="clear"><!----></div>
29
+
30
+ </div>
31
+ <div id="body">
32
+ <div class="command">
33
+ <a href="/plugins">Index</a>
34
+ </div>
35
+
36
+
37
+ <h1>Plugin List</h1>
38
+
39
+ <table class="plugin-list" cellspacing="1" border="0">
40
+ <thead>
41
+ <tr>
42
+ <th>
43
+ <a href="/plugins/sorted?page=1&sort=name1">▲</a>
44
+ Name
45
+ <a href="/plugins/sorted?page=1&sort=name2">▼</a
46
+ </th>
47
+ <th>Repos</th>
48
+ <th>Registered by</th>
49
+ <th>Description</th>
50
+ <th></th>
51
+ </tr>
52
+ </thead>
53
+ <tbody>
54
+ <tr class="even">
55
+ <td><a href="/plugins/36">eventmachine-0.12.5</a></td>
56
+ <td>&dagger;</td>
57
+ <td><a href="/users/1">genki</a></td>
58
+ <td>EventMachine: fast, simple event-processing library for Ruby programs</td>
59
+ <td>
60
+ </td>
61
+ </tr>
62
+ <tr class="">
63
+ <td><a href="/plugins/35">dm-last-0.0.1</a></td>
64
+ <td>&dagger;</td>
65
+ <td><a href="/users/1">genki</a></td>
66
+ <td>A DataMapper plugin that offers a short-hand for Model.all.last as Model.last</td>
67
+ <td>
68
+ </td>
69
+ </tr>
70
+ <tr class="even">
71
+ <td><a href="/plugins/34">sweet_merb_fixtures-0.0.4</a></td>
72
+ <td>&dagger;</td>
73
+ <td><a href="/users/1">genki</a></td>
74
+ <td>The way to store records from YAML file.</td>
75
+ <td>
76
+ </td>
77
+ </tr>
78
+ <tr class="">
79
+ <td><a href="/plugins/33">merb-slices-search-path-fix-0.0.1</a></td>
80
+ <td>&dagger;</td>
81
+ <td><a href="/users/1">genki</a></td>
82
+ <td>A merb plugin that fixes </td>
83
+ <td>
84
+ </td>
85
+ </tr>
86
+ <tr class="even">
87
+ <td><a href="/plugins/32">rubyforge-1.0.2</a></td>
88
+ <td>&dagger;</td>
89
+ <td><a href="/users/1">genki</a></td>
90
+ <td>A script which automates a limited set of rubyforge operations.
91
+ </td>
92
+ <td>
93
+ </td>
94
+ </tr>
95
+ <tr class="">
96
+ <td><a href="/plugins/31">ruby-git</a></td>
97
+ <td>&dagger;</td>
98
+ <td><a href="/users/1">genki</a></td>
99
+ <td>Ruby/Git is a Ruby library that can be used to create, read and manipulate Git repositories by wrapping system calls to the git binary.</td>
100
+ <td>
101
+ </td>
102
+ </tr>
103
+ <tr class="even">
104
+ <td><a href="/plugins/30">fastthread-1.0.1</a></td>
105
+ <td>&dagger;</td>
106
+ <td><a href="/users/1">genki</a></td>
107
+ <td>fastthread that is compatible to ruby191</td>
108
+ <td>
109
+ </td>
110
+ </tr>
111
+ <tr class="">
112
+ <td><a href="/plugins/29">mime-types-1.15.1</a></td>
113
+ <td>&dagger;</td>
114
+ <td><a href="/users/1">genki</a></td>
115
+ <td>This library allows for the identification of a file’s likely MIME content type.</td>
116
+ <td>
117
+ </td>
118
+ </tr>
119
+ <tr class="even">
120
+ <td><a href="/plugins/28">memcached-0.14</a></td>
121
+ <td>&dagger;</td>
122
+ <td><a href="/users/1">genki</a></td>
123
+ <td>A Ruby interface to the libmemcached C client</td>
124
+ <td>
125
+ </td>
126
+ </tr>
127
+ <tr class="">
128
+ <td><a href="/plugins/27">gem_plugin-0.2.3</a></td>
129
+ <td>&dagger;</td>
130
+ <td><a href="/users/1">genki</a></td>
131
+ <td>Gem Based Plugin System</td>
132
+ <td>
133
+ </td>
134
+ </tr>
135
+ <tr class="even">
136
+ <td><a href="/plugins/26">cgi_multipart_eof_fix-2.5.0</a></td>
137
+ <td>&dagger;</td>
138
+ <td><a href="/users/1">genki</a></td>
139
+ <td>cgi_multipart_eof_fix on github</td>
140
+ <td>
141
+ </td>
142
+ </tr>
143
+ <tr class="">
144
+ <td><a href="/plugins/25">mongrel-1.1.2</a></td>
145
+ <td>&dagger;</td>
146
+ <td><a href="/users/1">genki</a></td>
147
+ <td>Mongrel</td>
148
+ <td>
149
+ </td>
150
+ </tr>
151
+ <tr class="even">
152
+ <td><a href="/plugins/24">hpricot-0.6.207</a></td>
153
+ <td>&dagger;</td>
154
+ <td><a href="/users/1">genki</a></td>
155
+ <td>A swift, liberal HTML parser with a fantastic library</td>
156
+ <td>
157
+ </td>
158
+ </tr>
159
+ <tr class="">
160
+ <td><a href="/plugins/23">methopara-0.3.0</a></td>
161
+ <td>&dagger;</td>
162
+ <td><a href="/users/1">genki</a></td>
163
+ <td>Method#parameters for ruby-1.9.1</td>
164
+ <td>
165
+ </td>
166
+ </tr>
167
+ <tr class="even">
168
+ <td><a href="/plugins/22">irb_rocket-0.1.3</a></td>
169
+ <td>&dagger;</td>
170
+ <td><a href="/users/1">genki</a></td>
171
+ <td>irb plugin that makes irb #=&gt; rocket</td>
172
+ <td>
173
+ </td>
174
+ </tr>
175
+ <tr class="">
176
+ <td><a href="/plugins/21">ruby-terminfo-0.1.1</a></td>
177
+ <td>&dagger;</td>
178
+ <td><a href="/users/1">genki</a></td>
179
+ <td>ruby-terminfo is a terminfo binding for Ruby</td>
180
+ <td>
181
+ </td>
182
+ </tr>
183
+ <tr class="even">
184
+ <td><a href="/plugins/20">extlib-present-0.0.2</a></td>
185
+ <td>&dagger;</td>
186
+ <td><a href="/users/1">genki</a></td>
187
+ <td>A plugin that provides Object#present?</td>
188
+ <td>
189
+ </td>
190
+ </tr>
191
+ <tr class="">
192
+ <td><a href="/plugins/19">merb_render_filter-0.0.2</a></td>
193
+ <td>&dagger;</td>
194
+ <td><a href="/users/1">genki</a></td>
195
+ <td>A plugin that provides {before|after}_render filters for Controller.</td>
196
+ <td>
197
+ </td>
198
+ </tr>
199
+ <tr class="even">
200
+ <td><a href="/plugins/18">bcrypt-ruby-2.0.3</a></td>
201
+ <td>&dagger;</td>
202
+ <td><a href="/users/1">genki</a></td>
203
+ <td>bcrypt-ruby is a Ruby binding for the OpenBSD bcrypt() password hashing algorithm, allowing you to easily store a secure hash of your users' passwords.
204
+ </td>
205
+ <td>
206
+ </td>
207
+ </tr>
208
+ <tr class="">
209
+ <td><a href="/plugins/17">json-1.1.4.1</a></td>
210
+ <td>&dagger;</td>
211
+ <td><a href="/users/1">genki</a></td>
212
+ <td>This library can parse JSON texts and generate them from ruby data structures. Install the extension variant (in C) with &quot;gem install json&quot; or install the pure Ruby variant with &quot;gem install json_pure&quot;.</td>
213
+ <td>
214
+ </td>
215
+ </tr>
216
+ </tbody>
217
+ </table>
218
+
219
+ <div class="pagination"><span class="prev disabled">&laquo; Prev</span>
220
+ <span class="current disabled">1</span>
221
+ <span class="disabled"><a href="/plugins/sorted?page=2">2</a></span>
222
+ <a class="next" rel="next" href="/plugins/sorted?page=2">Next &raquo;</a></div>
223
+
224
+ <div class="footnote">
225
+ &dagger; You can install these gems by
226
+ <code>
227
+ gem install gem-name --source http://merbi.st
228
+ </code>
229
+ </div>
230
+
231
+
232
+ </div>
233
+ <div id="footer">
234
+ <div id="footer">
235
+ 2008
236
+ <a href="http://wota.jp/ac/">maiha</a>,
237
+ <a href="http://d.jong.gr.jp/shachi">shachi</a>
238
+ and
239
+ <a href="http://blog.s21g.com/genki">genki</a>
240
+ (<a href="http://www.s21g.com/">s21g LLC</a>).
241
+ <span class="powered-by">
242
+ Powered by
243
+ Merb-1.0.9
244
+ (Ruby-1.9.1)
245
+ </span>
246
+ </div>
247
+
248
+ </div>
249
+ </div>
250
+
251
+ <script type="text/javascript">
252
+ var gaJsHost = (("https:" == document.location.protocol) ? "https://ssl." : "http://www.");
253
+ document.write(unescape("%3Cscript src='" + gaJsHost + "google-analytics.com/ga.js' type='text/javascript'%3E%3C/script%3E"));
254
+ </script>
255
+ <script type="text/javascript">
256
+ try {
257
+ var pageTracker = _gat._getTracker("UA-2733799-11");
258
+ pageTracker._trackPageview();
259
+ } catch(err) {}</script>
260
+
261
+ </body>
262
+ </html>