maiha-dm-ys 0.4 → 0.4.1

Sign up to get free protection for your applications and to get access to all the features.
data/README CHANGED
@@ -64,8 +64,31 @@ Append "*" to uri if you want pagination mode.
64
64
  => 36
65
65
 
66
66
 
67
+ Raw Element
68
+ ===========
69
+
70
+ "element_for" method returns raw scraped elements those are currently hpricot elements.
71
+
72
+ >> Plugin.names #=> ["Name", "Repos", "Registeredby", "Description", "col_1"]
73
+ >> record = Plugin.first #=> #<Plugin id=1 Name="eventmachine-0.12.5" Repos=...>
74
+ >> record.Name #=> "eventmachine-0.12.5"
75
+ >> record.element_for("Name") #=> {elem td {elem a href"/plugins/36" "eventmachine-0.12.5" a} td}
76
+ >> record.element_for("Name").class #=> Hpricot::Elem
77
+
78
+ "link_for" is syntax sugar for extracting href tag from its element.
79
+
80
+ >> record.link_for("Name") #=> "http://merbi.st/plugins/36"
81
+ >> record.link_for("Registeredby") #=> "http://merbi.st/users/1"
82
+
83
+ Furthermore, :only_path option can control the fully qualified URL or not.
84
+
85
+ >> Plugin.ys[:only_path] = true
86
+ >> record.link_for("Name") #=> "/plugins/36"
87
+ >> record.link_for("Registeredby") #=> "/users/1"
88
+
89
+
67
90
  TODO
68
91
  ====
69
- * store link information for each columns
92
+ * Feel free to request what you want! :)
70
93
 
71
94
  Copyright (c) 2008 maiha@wota.jp, released under the MIT license
data/Rakefile CHANGED
@@ -33,7 +33,7 @@ AUTHOR = "maiha"
33
33
  EMAIL = "maiha@wota.jp"
34
34
  HOMEPAGE = "http://github.com/maiha/dm-ys"
35
35
  SUMMARY = "a DataMapper extension that uses html table as its schema and data powerfully like YunkerStar"
36
- GEM_VERSION = "0.4"
36
+ GEM_VERSION = "0.4.1"
37
37
 
38
38
  spec = Gem::Specification.new do |s|
39
39
  # s.rubyforge_project = 'merb'
@@ -2,7 +2,7 @@ module DataMapper
2
2
  module YS
3
3
  class Config
4
4
  def self.default
5
- {:max_pages=>100, :uniq=>true}
5
+ {:max_pages=>100, :uniq=>true, :only_path=>false}
6
6
  end
7
7
 
8
8
  def initialize(options = nil)
@@ -25,6 +25,10 @@ module DataMapper
25
25
  def uniq_entry?
26
26
  self[:uniq] == true or self[:uniq] == :entry
27
27
  end
28
+
29
+ def only_path?
30
+ !!self[:only_path]
31
+ end
28
32
  end
29
33
  end
30
34
  end
@@ -0,0 +1,47 @@
1
+ module DataMapper
2
+ module YS
3
+
4
+ # ==== Example
5
+ #
6
+ # Class Foo
7
+ # include DataMapper::YS
8
+ # uri ...
9
+ #
10
+ # # <tr><th>name</th>...
11
+ # # <tr><td><a href="/plugins/36">dm-ys</a></td>...
12
+ #
13
+ # foo = Foo.first
14
+ # foo.link_for(:name) # => "/plugins/36"
15
+
16
+ module ElementProperty
17
+ def link_for(key)
18
+ links_for(key).first
19
+ end
20
+
21
+ def links_for(key)
22
+ key = normalized_property_for(key)
23
+ (@links[key.to_s] || []).map do |url|
24
+ if self.class.ys.only_path?
25
+ url
26
+ else
27
+ (self.class.proxy.base_uri + url).to_s
28
+ end
29
+ end
30
+ end
31
+
32
+ def element_for(key)
33
+ key = normalized_property_for(key)
34
+ @elements[key.to_s]
35
+ end
36
+
37
+ def links=(value)
38
+ @links = value
39
+ end
40
+
41
+ def elements=(value)
42
+ @elements = value
43
+ end
44
+ end
45
+
46
+ end
47
+ end
@@ -68,6 +68,10 @@ module DataMapper
68
68
  @uri || @model.uri.to_s.chomp('*')
69
69
  end
70
70
 
71
+ def base_uri
72
+ URI.parse(uri.split('?').first)
73
+ end
74
+
71
75
  def register_properties!
72
76
  names.each do |name|
73
77
  type = String # TODO
@@ -95,7 +99,7 @@ module DataMapper
95
99
  end
96
100
 
97
101
  def pagination_links
98
- base = URI.parse(uri.split('?').first)
102
+ base = base_uri
99
103
  urls = (doc / "a").map{|i| i[:href] =~ /^http/ ? i[:href] : (base+i[:href]).to_s}.uniq
100
104
  urls.select{|url| /^#{Regexp.escape(base.to_s)}/ === url}
101
105
  end
@@ -122,12 +126,17 @@ module DataMapper
122
126
  labels {thead.search("> tr").first.search("> td|th").map{|i|strip_tags(i.inner_html)}}
123
127
  records {
124
128
  tbody.search("> tr").map do |tr|
125
- elems = tr.search("> td")
126
- values = elems.map{|i|strip_tags(i.inner_html)}
129
+ elems = tr.search("> td")
130
+ next if elems.blank? # ignored because this should be TH columns
131
+
132
+ values = elems.map{|i|strip_tags(i.inner_html)}
133
+ elements = Hash[*names.zip(elems).flatten]
134
+
127
135
  record = @model.new(Hash[*names.zip(values).flatten])
128
- record.elements = Hash[*names.zip(elems).flatten]
136
+ record.elements = elements
137
+ record.links = names.inject({}){|h,name| h[name] = links_for(elements[name]); h}
129
138
  record
130
- end
139
+ end.compact
131
140
  }
132
141
  end
133
142
 
@@ -184,6 +193,17 @@ module DataMapper
184
193
  def strip_tags(html)
185
194
  html.gsub(/<.*?>/, '').strip
186
195
  end
196
+
197
+ def links_for(element)
198
+ case element
199
+ when Hpricot::Elem
200
+ return Array(element.search("a")).map{|i| i[:href]}
201
+ when Hpricot::Elements
202
+ return element.map{|e| links_for(e)}.flatten
203
+ else
204
+ return []
205
+ end
206
+ end
187
207
  end
188
208
 
189
209
  ######################################################################
@@ -0,0 +1,42 @@
1
+ require File.join( File.dirname(__FILE__), "spec_helper" )
2
+
3
+ describe DataMapper::YS, "(a record)" do
4
+ class OnlyPath
5
+ include DataMapper::YS
6
+ uri "http://merbi.st/plugins/"
7
+ ys[:only_path] = true
8
+ end
9
+
10
+ before(:each) do
11
+ @record = Plugin.first
12
+ end
13
+
14
+ it "should provide #link_for" do
15
+ @record.should respond_to(:link_for)
16
+ end
17
+
18
+ describe "#link_for" do
19
+ it "should return first link if its element has href attributes" do
20
+ @record.link_for("Name").should == "http://merbi.st/plugins/36"
21
+ end
22
+
23
+ it "should return nil if its element has no href attributes" do
24
+ @record.link_for("Description").should == nil
25
+ end
26
+
27
+ it "should return only path link when :only_path is true" do
28
+ OnlyPath.first.link_for("Name").should == "/plugins/36"
29
+ end
30
+ end
31
+
32
+ it "should provide #element_for" do
33
+ @record.should respond_to(:element_for)
34
+ end
35
+
36
+ describe "#element_for" do
37
+ it "should return first link if its element has href attributes" do
38
+ @record.element_for("Name").to_s.should == "<td><a href=\"/plugins/36\">eventmachine-0.12.5</a></td>"
39
+ end
40
+ end
41
+
42
+ end
@@ -63,6 +63,15 @@ describe DataMapper::YS::Config do
63
63
  config = DataMapper::YS::Config.new(:uniq=>false)
64
64
  config.uniq_entry?.should == false
65
65
  end
66
+ end
67
+
68
+ it "should provide #only_path?" do
69
+ @config.should respond_to(:only_path?)
70
+ end
66
71
 
72
+ describe "[:only_path]" do
73
+ it "should has false as default value" do
74
+ @config[:only_path].should == false
75
+ end
67
76
  end
68
77
  end
@@ -0,0 +1,108 @@
1
+ <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
2
+ <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en-us" lang="en-us">
3
+ <head>
4
+ <title>Merbist Plugins</title>
5
+ <meta http-equiv="content-type" content="text/html; charset=utf-8" />
6
+ <meta name="verify-v1"
7
+ content="QqJ1Kmvs51kF+1Sn+7JUqkXTXbnmLVKzFctoGLRDLE8=" />
8
+ <link rel="stylesheet" href="/stylesheets/master.css" type="text/css"
9
+ media="screen" charset="utf-8" />
10
+ <script src="/javascripts/jquery.js" type="text/javascript"></script>
11
+ <link rel="alternate" type="application/rss+xml" title="Atom" href="/plugins.atom"></link>
12
+
13
+ </head>
14
+ <body>
15
+ <div id="base">
16
+ <div id="header">
17
+ <div id="navi">
18
+ <a href="/">Top</a>
19
+ <a href="/plugins">Plugins</a>
20
+ <a href="/talks">Talks</a>
21
+ <a href="/sites">Sites</a>
22
+ <a href="/users">Users</a>
23
+ </div>
24
+ <div id="menu">
25
+ <a href="/login">Login</a>
26
+ <a href="/users/new">Sing up</a>
27
+ </div>
28
+ <div class="clear"><!----></div>
29
+
30
+ </div>
31
+ <div id="body">
32
+ <div class="command">
33
+ <a href="/plugins">Index</a>
34
+ </div>
35
+
36
+
37
+ <h1>Plugin List</h1>
38
+
39
+ <table class="plugin-list" cellspacing="1" border="0">
40
+ <tr>
41
+ <th>Name</th>
42
+ <th>Repos</th>
43
+ <th>Registered by</th>
44
+ <th>Description</th>
45
+ <th></th>
46
+ </tr>
47
+ <tr class="even">
48
+ <td><a href="/plugins/36">eventmachine-0.12.5</a></td>
49
+ <td>&dagger;</td>
50
+ <td><a href="/users/1">genki</a></td>
51
+ <td>EventMachine</td>
52
+ <td>
53
+ </td>
54
+ </tr>
55
+ <tr class="">
56
+ <td><a href="/plugins/35">dm-last-0.0.1</a></td>
57
+ <td>&dagger;</td>
58
+ <td><a href="/users/1">genki</a></td>
59
+ <td>Model.last</td>
60
+ <td>
61
+ </td>
62
+ </tr>
63
+ </table>
64
+
65
+ <div class="pagination"><span class="prev disabled">&laquo; Prev</span>
66
+ <span class="current disabled">1</span>
67
+ <span class="disabled"><a href="/plugins/uniq?page=2">2</a></span>
68
+ <a class="next" rel="next" href="/plugins/uniq?page=2">Next &raquo;</a></div>
69
+
70
+ <div class="footnote">
71
+ &dagger; You can install these gems by
72
+ <code>
73
+ gem install gem-name --source http://merbi.st
74
+ </code>
75
+ </div>
76
+
77
+
78
+ </div>
79
+ <div id="footer">
80
+ <div id="footer">
81
+ 2008
82
+ <a href="http://wota.jp/ac/">maiha</a>,
83
+ <a href="http://d.jong.gr.jp/shachi">shachi</a>
84
+ and
85
+ <a href="http://blog.s21g.com/genki">genki</a>
86
+ (<a href="http://www.s21g.com/">s21g LLC</a>).
87
+ <span class="powered-by">
88
+ Powered by
89
+ Merb-1.0.9
90
+ (Ruby-1.9.1)
91
+ </span>
92
+ </div>
93
+
94
+ </div>
95
+ </div>
96
+
97
+ <script type="text/javascript">
98
+ var gaJsHost = (("https:" == document.location.protocol) ? "https://ssl." : "http://www.");
99
+ document.write(unescape("%3Cscript src='" + gaJsHost + "google-analytics.com/ga.js' type='text/javascript'%3E%3C/script%3E"));
100
+ </script>
101
+ <script type="text/javascript">
102
+ try {
103
+ var pageTracker = _gat._getTracker("UA-2733799-11");
104
+ pageTracker._trackPageview();
105
+ } catch(err) {}</script>
106
+
107
+ </body>
108
+ </html>
@@ -0,0 +1,24 @@
1
+ class BlankHtml
2
+ include DataMapper::YS
3
+ uri spec_data_path("blank.html")
4
+ end
5
+
6
+ class BlankStyle
7
+ include DataMapper::YS
8
+ uri spec_data_path("plugins1.html")
9
+ end
10
+
11
+ class TableStyle < BlankStyle
12
+ uri spec_data_path("plugins1.html")
13
+ table "table.main"
14
+ end
15
+
16
+ class TheadStyle < BlankStyle
17
+ uri spec_data_path("plugins1.html")
18
+ thead "table.main"
19
+ end
20
+
21
+ class ThStyle
22
+ include DataMapper::YS
23
+ uri spec_data_path("th.html")
24
+ end
@@ -1,26 +1,6 @@
1
1
  require File.join( File.dirname(__FILE__), "spec_helper" )
2
2
 
3
3
  describe DataMapper::YS do
4
- class ::BlankHtml
5
- include DataMapper::YS
6
- uri spec_data_path("blank.html")
7
- end
8
-
9
- class ::BlankStyle
10
- include DataMapper::YS
11
- uri spec_data_path("plugins1.html")
12
- end
13
-
14
- class ::TableStyle < BlankStyle
15
- uri spec_data_path("plugins1.html")
16
- table "table.main"
17
- end
18
-
19
- class ::TheadStyle < BlankStyle
20
- uri spec_data_path("plugins1.html")
21
- thead "table.main"
22
- end
23
-
24
4
  ######################################################################
25
5
  ### Config
26
6
 
@@ -120,6 +100,5 @@ describe DataMapper::YS do
120
100
  ["Name", "Repos", "Registered by", "Description", ""]
121
101
  end
122
102
  end
123
-
124
103
  end
125
104
  end
@@ -39,4 +39,10 @@ describe DataMapper::YS::Proxy do
39
39
  end
40
40
  end
41
41
 
42
+ ######################################################################
43
+ ### Guess tbody
44
+
45
+ it "should ignore th columns" do
46
+ ThStyle.count.should == 2
47
+ end
42
48
  end
@@ -32,6 +32,8 @@ rescue LoadError
32
32
  end
33
33
 
34
34
  mapping = {
35
+ "http://merbi.st/plugins/" => spec_data_path("plugins1.html"),
36
+
35
37
  # plugin (paginated)
36
38
  "http://merbi.st/plugins/index?page=1" => spec_data_path("plugins1.html"),
37
39
  "http://merbi.st/plugins/index?page=2" => spec_data_path("plugins2.html"),
@@ -0,0 +1,17 @@
1
+ require File.join( File.dirname(__FILE__), "spec_helper" )
2
+
3
+ describe DataMapper::YS::Scraper::Composite do
4
+ describe "UniqPlugin" do
5
+ it "should return 2 pages" do
6
+ UniqPlugin.proxy.pages.size.should == 2
7
+ end
8
+
9
+ describe "#count" do
10
+ it "should return same value as Plugin" do
11
+ UniqPlugin1.count.should == 2
12
+ UniqPlugin2.count.should == 2
13
+ UniqPlugin .count.should == 3
14
+ end
15
+ end
16
+ end
17
+ end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: maiha-dm-ys
3
3
  version: !ruby/object:Gem::Version
4
- version: "0.4"
4
+ version: 0.4.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - maiha
@@ -9,7 +9,7 @@ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
11
 
12
- date: 2009-03-07 00:00:00 -08:00
12
+ date: 2009-03-08 00:00:00 -08:00
13
13
  default_executable:
14
14
  dependencies:
15
15
  - !ruby/object:Gem::Dependency
@@ -59,6 +59,7 @@ files:
59
59
  - lib/dm-ys
60
60
  - lib/dm-ys/base.rb
61
61
  - lib/dm-ys/config.rb
62
+ - lib/dm-ys/element_property.rb
62
63
  - lib/dm-ys/indexed_property.rb
63
64
  - lib/dm-ys/memory_repository.rb
64
65
  - lib/dm-ys/scraper.rb
@@ -69,15 +70,19 @@ files:
69
70
  - spec/data/uniq2.html
70
71
  - spec/data/sorted2.html
71
72
  - spec/data/sorted1.html
73
+ - spec/data/th.html
72
74
  - spec/data/plugins1.html
73
75
  - spec/data/gem_maintainers.html
74
76
  - spec/data/plugins2.html
75
77
  - spec/data/uniq1.html
76
78
  - spec/data/blank.html
77
79
  - spec/models
80
+ - spec/models/style.rb
78
81
  - spec/models/gem_maintainer.rb
79
82
  - spec/models/plugin.rb
80
83
  - spec/composite_scraper_spec.rb
84
+ - spec/uniq_record_spec.rb
85
+ - spec/attribute_spec.rb
81
86
  - spec/anonymous_spec.rb
82
87
  - spec/scraper_spec.rb
83
88
  - spec/indexed_property_spec.rb