maiha-dm-ys 0.4 → 0.4.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/README CHANGED
@@ -64,8 +64,31 @@ Append "*" to uri if you want pagination mode.
64
64
  => 36
65
65
 
66
66
 
67
+ Raw Element
68
+ ===========
69
+
70
+ "element_for" method returns raw scraped elements those are currently hpricot elements.
71
+
72
+ >> Plugin.names #=> ["Name", "Repos", "Registeredby", "Description", "col_1"]
73
+ >> record = Plugin.first #=> #<Plugin id=1 Name="eventmachine-0.12.5" Repos=...>
74
+ >> record.Name #=> "eventmachine-0.12.5"
75
+ >> record.element_for("Name") #=> {elem td {elem a href"/plugins/36" "eventmachine-0.12.5" a} td}
76
+ >> record.element_for("Name").class #=> Hpricot::Elem
77
+
78
+ "link_for" is syntax sugar for extracting href tag from its element.
79
+
80
+ >> record.link_for("Name") #=> "http://merbi.st/plugins/36"
81
+ >> record.link_for("Registeredby") #=> "http://merbi.st/users/1"
82
+
83
+ Furthermore, :only_path option can control the fully qualified URL or not.
84
+
85
+ >> Plugin.ys[:only_path] = true
86
+ >> record.link_for("Name") #=> "/plugins/36"
87
+ >> record.link_for("Registeredby") #=> "/users/1"
88
+
89
+
67
90
  TODO
68
91
  ====
69
- * store link information for each columns
92
+ * Feel free to request what you want! :)
70
93
 
71
94
  Copyright (c) 2008 maiha@wota.jp, released under the MIT license
data/Rakefile CHANGED
@@ -33,7 +33,7 @@ AUTHOR = "maiha"
33
33
  EMAIL = "maiha@wota.jp"
34
34
  HOMEPAGE = "http://github.com/maiha/dm-ys"
35
35
  SUMMARY = "a DataMapper extension that uses html table as its schema and data powerfully like YunkerStar"
36
- GEM_VERSION = "0.4"
36
+ GEM_VERSION = "0.4.1"
37
37
 
38
38
  spec = Gem::Specification.new do |s|
39
39
  # s.rubyforge_project = 'merb'
@@ -2,7 +2,7 @@ module DataMapper
2
2
  module YS
3
3
  class Config
4
4
  def self.default
5
- {:max_pages=>100, :uniq=>true}
5
+ {:max_pages=>100, :uniq=>true, :only_path=>false}
6
6
  end
7
7
 
8
8
  def initialize(options = nil)
@@ -25,6 +25,10 @@ module DataMapper
25
25
  def uniq_entry?
26
26
  self[:uniq] == true or self[:uniq] == :entry
27
27
  end
28
+
29
+ def only_path?
30
+ !!self[:only_path]
31
+ end
28
32
  end
29
33
  end
30
34
  end
@@ -0,0 +1,47 @@
1
+ module DataMapper
2
+ module YS
3
+
4
+ # ==== Example
5
+ #
6
+ # Class Foo
7
+ # include DataMapper::YS
8
+ # uri ...
9
+ #
10
+ # # <tr><th>name</th>...
11
+ # # <tr><td><a href="/plugins/36">dm-ys</a></td>...
12
+ #
13
+ # foo = Foo.first
14
+ # foo.link_for(:name) # => "/plugins/36"
15
+
16
+ module ElementProperty
17
+ def link_for(key)
18
+ links_for(key).first
19
+ end
20
+
21
+ def links_for(key)
22
+ key = normalized_property_for(key)
23
+ (@links[key.to_s] || []).map do |url|
24
+ if self.class.ys.only_path?
25
+ url
26
+ else
27
+ (self.class.proxy.base_uri + url).to_s
28
+ end
29
+ end
30
+ end
31
+
32
+ def element_for(key)
33
+ key = normalized_property_for(key)
34
+ @elements[key.to_s]
35
+ end
36
+
37
+ def links=(value)
38
+ @links = value
39
+ end
40
+
41
+ def elements=(value)
42
+ @elements = value
43
+ end
44
+ end
45
+
46
+ end
47
+ end
@@ -68,6 +68,10 @@ module DataMapper
68
68
  @uri || @model.uri.to_s.chomp('*')
69
69
  end
70
70
 
71
+ def base_uri
72
+ URI.parse(uri.split('?').first)
73
+ end
74
+
71
75
  def register_properties!
72
76
  names.each do |name|
73
77
  type = String # TODO
@@ -95,7 +99,7 @@ module DataMapper
95
99
  end
96
100
 
97
101
  def pagination_links
98
- base = URI.parse(uri.split('?').first)
102
+ base = base_uri
99
103
  urls = (doc / "a").map{|i| i[:href] =~ /^http/ ? i[:href] : (base+i[:href]).to_s}.uniq
100
104
  urls.select{|url| /^#{Regexp.escape(base.to_s)}/ === url}
101
105
  end
@@ -122,12 +126,17 @@ module DataMapper
122
126
  labels {thead.search("> tr").first.search("> td|th").map{|i|strip_tags(i.inner_html)}}
123
127
  records {
124
128
  tbody.search("> tr").map do |tr|
125
- elems = tr.search("> td")
126
- values = elems.map{|i|strip_tags(i.inner_html)}
129
+ elems = tr.search("> td")
130
+ next if elems.blank? # ignored because this should be TH columns
131
+
132
+ values = elems.map{|i|strip_tags(i.inner_html)}
133
+ elements = Hash[*names.zip(elems).flatten]
134
+
127
135
  record = @model.new(Hash[*names.zip(values).flatten])
128
- record.elements = Hash[*names.zip(elems).flatten]
136
+ record.elements = elements
137
+ record.links = names.inject({}){|h,name| h[name] = links_for(elements[name]); h}
129
138
  record
130
- end
139
+ end.compact
131
140
  }
132
141
  end
133
142
 
@@ -184,6 +193,17 @@ module DataMapper
184
193
  def strip_tags(html)
185
194
  html.gsub(/<.*?>/, '').strip
186
195
  end
196
+
197
+ def links_for(element)
198
+ case element
199
+ when Hpricot::Elem
200
+ return Array(element.search("a")).map{|i| i[:href]}
201
+ when Hpricot::Elements
202
+ return element.map{|e| links_for(e)}.flatten
203
+ else
204
+ return []
205
+ end
206
+ end
187
207
  end
188
208
 
189
209
  ######################################################################
@@ -0,0 +1,42 @@
1
+ require File.join( File.dirname(__FILE__), "spec_helper" )
2
+
3
+ describe DataMapper::YS, "(a record)" do
4
+ class OnlyPath
5
+ include DataMapper::YS
6
+ uri "http://merbi.st/plugins/"
7
+ ys[:only_path] = true
8
+ end
9
+
10
+ before(:each) do
11
+ @record = Plugin.first
12
+ end
13
+
14
+ it "should provide #link_for" do
15
+ @record.should respond_to(:link_for)
16
+ end
17
+
18
+ describe "#link_for" do
19
+ it "should return first link if its element has href attributes" do
20
+ @record.link_for("Name").should == "http://merbi.st/plugins/36"
21
+ end
22
+
23
+ it "should return nil if its element has no href attributes" do
24
+ @record.link_for("Description").should == nil
25
+ end
26
+
27
+ it "should return only path link when :only_path is true" do
28
+ OnlyPath.first.link_for("Name").should == "/plugins/36"
29
+ end
30
+ end
31
+
32
+ it "should provide #element_for" do
33
+ @record.should respond_to(:element_for)
34
+ end
35
+
36
+ describe "#element_for" do
37
+ it "should return first link if its element has href attributes" do
38
+ @record.element_for("Name").to_s.should == "<td><a href=\"/plugins/36\">eventmachine-0.12.5</a></td>"
39
+ end
40
+ end
41
+
42
+ end
@@ -63,6 +63,15 @@ describe DataMapper::YS::Config do
63
63
  config = DataMapper::YS::Config.new(:uniq=>false)
64
64
  config.uniq_entry?.should == false
65
65
  end
66
+ end
67
+
68
+ it "should provide #only_path?" do
69
+ @config.should respond_to(:only_path?)
70
+ end
66
71
 
72
+ describe "[:only_path]" do
73
+ it "should has false as default value" do
74
+ @config[:only_path].should == false
75
+ end
67
76
  end
68
77
  end
@@ -0,0 +1,108 @@
1
+ <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
2
+ <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en-us" lang="en-us">
3
+ <head>
4
+ <title>Merbist Plugins</title>
5
+ <meta http-equiv="content-type" content="text/html; charset=utf-8" />
6
+ <meta name="verify-v1"
7
+ content="QqJ1Kmvs51kF+1Sn+7JUqkXTXbnmLVKzFctoGLRDLE8=" />
8
+ <link rel="stylesheet" href="/stylesheets/master.css" type="text/css"
9
+ media="screen" charset="utf-8" />
10
+ <script src="/javascripts/jquery.js" type="text/javascript"></script>
11
+ <link rel="alternate" type="application/rss+xml" title="Atom" href="/plugins.atom"></link>
12
+
13
+ </head>
14
+ <body>
15
+ <div id="base">
16
+ <div id="header">
17
+ <div id="navi">
18
+ <a href="/">Top</a>
19
+ <a href="/plugins">Plugins</a>
20
+ <a href="/talks">Talks</a>
21
+ <a href="/sites">Sites</a>
22
+ <a href="/users">Users</a>
23
+ </div>
24
+ <div id="menu">
25
+ <a href="/login">Login</a>
26
+ <a href="/users/new">Sing up</a>
27
+ </div>
28
+ <div class="clear"><!----></div>
29
+
30
+ </div>
31
+ <div id="body">
32
+ <div class="command">
33
+ <a href="/plugins">Index</a>
34
+ </div>
35
+
36
+
37
+ <h1>Plugin List</h1>
38
+
39
+ <table class="plugin-list" cellspacing="1" border="0">
40
+ <tr>
41
+ <th>Name</th>
42
+ <th>Repos</th>
43
+ <th>Registered by</th>
44
+ <th>Description</th>
45
+ <th></th>
46
+ </tr>
47
+ <tr class="even">
48
+ <td><a href="/plugins/36">eventmachine-0.12.5</a></td>
49
+ <td>&dagger;</td>
50
+ <td><a href="/users/1">genki</a></td>
51
+ <td>EventMachine</td>
52
+ <td>
53
+ </td>
54
+ </tr>
55
+ <tr class="">
56
+ <td><a href="/plugins/35">dm-last-0.0.1</a></td>
57
+ <td>&dagger;</td>
58
+ <td><a href="/users/1">genki</a></td>
59
+ <td>Model.last</td>
60
+ <td>
61
+ </td>
62
+ </tr>
63
+ </table>
64
+
65
+ <div class="pagination"><span class="prev disabled">&laquo; Prev</span>
66
+ <span class="current disabled">1</span>
67
+ <span class="disabled"><a href="/plugins/uniq?page=2">2</a></span>
68
+ <a class="next" rel="next" href="/plugins/uniq?page=2">Next &raquo;</a></div>
69
+
70
+ <div class="footnote">
71
+ &dagger; You can install these gems by
72
+ <code>
73
+ gem install gem-name --source http://merbi.st
74
+ </code>
75
+ </div>
76
+
77
+
78
+ </div>
79
+ <div id="footer">
80
+ <div id="footer">
81
+ 2008
82
+ <a href="http://wota.jp/ac/">maiha</a>,
83
+ <a href="http://d.jong.gr.jp/shachi">shachi</a>
84
+ and
85
+ <a href="http://blog.s21g.com/genki">genki</a>
86
+ (<a href="http://www.s21g.com/">s21g LLC</a>).
87
+ <span class="powered-by">
88
+ Powered by
89
+ Merb-1.0.9
90
+ (Ruby-1.9.1)
91
+ </span>
92
+ </div>
93
+
94
+ </div>
95
+ </div>
96
+
97
+ <script type="text/javascript">
98
+ var gaJsHost = (("https:" == document.location.protocol) ? "https://ssl." : "http://www.");
99
+ document.write(unescape("%3Cscript src='" + gaJsHost + "google-analytics.com/ga.js' type='text/javascript'%3E%3C/script%3E"));
100
+ </script>
101
+ <script type="text/javascript">
102
+ try {
103
+ var pageTracker = _gat._getTracker("UA-2733799-11");
104
+ pageTracker._trackPageview();
105
+ } catch(err) {}</script>
106
+
107
+ </body>
108
+ </html>
@@ -0,0 +1,24 @@
1
+ class BlankHtml
2
+ include DataMapper::YS
3
+ uri spec_data_path("blank.html")
4
+ end
5
+
6
+ class BlankStyle
7
+ include DataMapper::YS
8
+ uri spec_data_path("plugins1.html")
9
+ end
10
+
11
+ class TableStyle < BlankStyle
12
+ uri spec_data_path("plugins1.html")
13
+ table "table.main"
14
+ end
15
+
16
+ class TheadStyle < BlankStyle
17
+ uri spec_data_path("plugins1.html")
18
+ thead "table.main"
19
+ end
20
+
21
+ class ThStyle
22
+ include DataMapper::YS
23
+ uri spec_data_path("th.html")
24
+ end
@@ -1,26 +1,6 @@
1
1
  require File.join( File.dirname(__FILE__), "spec_helper" )
2
2
 
3
3
  describe DataMapper::YS do
4
- class ::BlankHtml
5
- include DataMapper::YS
6
- uri spec_data_path("blank.html")
7
- end
8
-
9
- class ::BlankStyle
10
- include DataMapper::YS
11
- uri spec_data_path("plugins1.html")
12
- end
13
-
14
- class ::TableStyle < BlankStyle
15
- uri spec_data_path("plugins1.html")
16
- table "table.main"
17
- end
18
-
19
- class ::TheadStyle < BlankStyle
20
- uri spec_data_path("plugins1.html")
21
- thead "table.main"
22
- end
23
-
24
4
  ######################################################################
25
5
  ### Config
26
6
 
@@ -120,6 +100,5 @@ describe DataMapper::YS do
120
100
  ["Name", "Repos", "Registered by", "Description", ""]
121
101
  end
122
102
  end
123
-
124
103
  end
125
104
  end
@@ -39,4 +39,10 @@ describe DataMapper::YS::Proxy do
39
39
  end
40
40
  end
41
41
 
42
+ ######################################################################
43
+ ### Guess tbody
44
+
45
+ it "should ignore th columns" do
46
+ ThStyle.count.should == 2
47
+ end
42
48
  end
@@ -32,6 +32,8 @@ rescue LoadError
32
32
  end
33
33
 
34
34
  mapping = {
35
+ "http://merbi.st/plugins/" => spec_data_path("plugins1.html"),
36
+
35
37
  # plugin (paginated)
36
38
  "http://merbi.st/plugins/index?page=1" => spec_data_path("plugins1.html"),
37
39
  "http://merbi.st/plugins/index?page=2" => spec_data_path("plugins2.html"),
@@ -0,0 +1,17 @@
1
+ require File.join( File.dirname(__FILE__), "spec_helper" )
2
+
3
+ describe DataMapper::YS::Scraper::Composite do
4
+ describe "UniqPlugin" do
5
+ it "should return 2 pages" do
6
+ UniqPlugin.proxy.pages.size.should == 2
7
+ end
8
+
9
+ describe "#count" do
10
+ it "should return same value as Plugin" do
11
+ UniqPlugin1.count.should == 2
12
+ UniqPlugin2.count.should == 2
13
+ UniqPlugin .count.should == 3
14
+ end
15
+ end
16
+ end
17
+ end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: maiha-dm-ys
3
3
  version: !ruby/object:Gem::Version
4
- version: "0.4"
4
+ version: 0.4.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - maiha
@@ -9,7 +9,7 @@ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
11
 
12
- date: 2009-03-07 00:00:00 -08:00
12
+ date: 2009-03-08 00:00:00 -08:00
13
13
  default_executable:
14
14
  dependencies:
15
15
  - !ruby/object:Gem::Dependency
@@ -59,6 +59,7 @@ files:
59
59
  - lib/dm-ys
60
60
  - lib/dm-ys/base.rb
61
61
  - lib/dm-ys/config.rb
62
+ - lib/dm-ys/element_property.rb
62
63
  - lib/dm-ys/indexed_property.rb
63
64
  - lib/dm-ys/memory_repository.rb
64
65
  - lib/dm-ys/scraper.rb
@@ -69,15 +70,19 @@ files:
69
70
  - spec/data/uniq2.html
70
71
  - spec/data/sorted2.html
71
72
  - spec/data/sorted1.html
73
+ - spec/data/th.html
72
74
  - spec/data/plugins1.html
73
75
  - spec/data/gem_maintainers.html
74
76
  - spec/data/plugins2.html
75
77
  - spec/data/uniq1.html
76
78
  - spec/data/blank.html
77
79
  - spec/models
80
+ - spec/models/style.rb
78
81
  - spec/models/gem_maintainer.rb
79
82
  - spec/models/plugin.rb
80
83
  - spec/composite_scraper_spec.rb
84
+ - spec/uniq_record_spec.rb
85
+ - spec/attribute_spec.rb
81
86
  - spec/anonymous_spec.rb
82
87
  - spec/scraper_spec.rb
83
88
  - spec/indexed_property_spec.rb