maiha-dm-ys 0.1 → 0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/README CHANGED
@@ -6,18 +6,22 @@ a DataMapper extension that uses html table as its schema and data powerfully li
6
6
  Example
7
7
  =======
8
8
 
9
- class Monster
9
+ class Maintainer
10
10
  include DataMapper::YunkerStar
11
-
12
- uri "http://ds.gkwiki2.com/47.html"
13
- thead "table.style_table thead tr"
14
- tbody "table.style_table tbody tr"
11
+ uri "http://datamapper.org/doku.php?id=gem_maintainers"
15
12
  end
16
-
17
- irb(main):001:0> Monster.count
18
- => 120
19
- irb(main):002:0> Monster.first
20
- => #<Monster id=nil LV="2" 種族="妖精" 名称="ピクシー" HP="30" MP="27" ...
13
+
14
+ irb(main):001:0> Maintainer.count
15
+ => 31
16
+
17
+ irb(main):002:0> Maintainer.first
18
+ => #<Maintainer id=nil _Gem="data_objects" _Maintainer="dbussink" Name="Dirkjan Bussink">
19
+
20
+ irb(main):003:0> Maintainer.names
21
+ => ["_Gem", "_Maintainer", "Name"]
22
+
23
+ irb(main):004:0> Maintainer.labels
24
+ => ["Gem", "Maintainer", "Name"]
21
25
 
22
26
 
23
27
  Copyright (c) 2008 maiha@wota.jp, released under the MIT license
data/Rakefile CHANGED
@@ -33,7 +33,7 @@ AUTHOR = "maiha"
33
33
  EMAIL = "maiha@wota.jp"
34
34
  HOMEPAGE = "http://github.com/maiha/dm-ys"
35
35
  SUMMARY = "a DataMapper extension that uses html table as its schema and data powerfully like YunkerStar"
36
- GEM_VERSION = "0.1"
36
+ GEM_VERSION = "0.2"
37
37
 
38
38
  spec = Gem::Specification.new do |s|
39
39
  # s.rubyforge_project = 'merb'
@@ -21,7 +21,9 @@ module DataMapper
21
21
  end
22
22
 
23
23
  def method_missing(symbol, &block)
24
- @klass.send(:define_method, symbol, &block)
24
+ cached = "__cached__#{symbol}"
25
+ @klass.send(:define_method, cached, &block)
26
+ @klass.class_eval("def #{symbol}; @#{cached} ||= #{cached}; end", "(__CACHED_ACCESSOR__)", 1)
25
27
  end
26
28
  end
27
29
  end
data/lib/dm-ys/proxy.rb CHANGED
@@ -15,6 +15,7 @@ module DataMapper
15
15
  model.class_eval do
16
16
  extend ClassMethods
17
17
  dsl_accessor :uri
18
+ dsl_accessor :table
18
19
  dsl_accessor :tbody
19
20
  dsl_accessor :thead
20
21
  property :id, DataMapper::Types::Serial
@@ -28,13 +29,17 @@ module DataMapper
28
29
 
29
30
  def lazy_load
30
31
  loader = Scraper.new(self)
31
- loader.labels.each do |name|
32
+ loader.names.each do |name|
32
33
  type = String # TODO
33
34
  property name.intern, type
34
35
  end
35
36
  return loader
36
37
  end
37
38
 
39
+ def names
40
+ proxy.names
41
+ end
42
+
38
43
  def labels
39
44
  proxy.labels
40
45
  end
@@ -49,7 +54,7 @@ module DataMapper
49
54
 
50
55
  def all
51
56
  @all ||= proxy.entries.map{|array|
52
- new(Hash[*proxy.labels.zip(array).flatten])
57
+ new(Hash[*proxy.names.zip(array).flatten])
53
58
  }
54
59
  end
55
60
 
data/lib/dm-ys/scraper.rb CHANGED
@@ -11,17 +11,93 @@ module DataMapper
11
11
  class Scraper
12
12
  include CachedAccessor
13
13
 
14
+ class TableNotFound < RuntimeError; end
15
+
16
+ attr_reader :html
17
+
14
18
  def initialize(model)
19
+ raise ArgumentError, "missing model" unless model
20
+ raise ArgumentError, "missing uri" unless model.uri
15
21
  @model = model
16
- @html = NKF.nkf('-w', open(@model.uri).read)
22
+ @html = NKF.nkf('-w', open(model.uri).read)
23
+ @invalid_name_count = 0
24
+ end
25
+
26
+ def guess_table
27
+ [max_table_from("table"), max_table_from("table > tbody")].sort_by(&:first).last.last or
28
+ raise TableNotFound, "set 'table' or 'tbody' manually"
17
29
  end
18
30
 
19
31
  cached_accessor do
20
32
  doc {Hpricot(@html)}
21
- labels {doc.search(@model.thead).first.children.map(&:inner_html)}
22
- entries {doc.search(@model.tbody).map{|tr| tr.children.map(&:inner_html)}}
33
+ table {specified(:table) or guess_table}
34
+ thead {specified(:thead) or table.search("> thead").first or table}
35
+ tbody {specified(:tbody) or table.search("> tbody").first or table}
36
+ names {labels.map{|i| label2name(i)}}
37
+ labels {thead.search("> tr").first.search("> td|th").map{|i|strip_tags(i.inner_html)}}
38
+ entries {tbody.search("> tr").map{|tr| tr.search("> td").map{|i|strip_tags(i.inner_html)}}.delete_if{|i|i.blank?}}
23
39
  end
24
- end
25
40
 
41
+ private
42
+
43
+ def max_table_from(entry)
44
+ table = nil
45
+ count = -1
46
+ doc.search(entry).each do |t|
47
+ size = t.search("> tr").size
48
+ if size > count
49
+ count = size
50
+ table = t
51
+ end
52
+ end
53
+ [count, table]
54
+ end
55
+
56
+ def specified(name)
57
+ @model.respond_to?(name) or raise ArgumentError, "invalid selector name: #{name}"
58
+ css = @model.__send__(name) or return nil
59
+
60
+ element = doc.search(css)
61
+ case element
62
+ when Hpricot::Elem
63
+ return element
64
+ when Hpricot::Elements
65
+ return element.first
66
+ else
67
+ return nil
68
+ end
69
+ end
70
+
71
+ def label2name(label)
72
+ require 'cgi'
73
+ label = CGI.unescapeHTML(label)
74
+ label.gsub!(/&nbsp;/, '')
75
+ label.gsub!(/\r?\n/, '')
76
+ label.delete!('!"#$%&()=~|`{}^-[]/<>:; \\')
77
+ label.delete!("'")
78
+ label.strip!
79
+
80
+ if /^([A-Z])/ === label and Object.const_defined?(label)
81
+ label = "_#{label}"
82
+ end
83
+ if label.blank? or @model.respond_to?(label, true)
84
+ new_name_for(label)
85
+ elsif /^[0-9]/ === label
86
+ "_#{label}"
87
+ else
88
+ label
89
+ end
90
+ end
91
+
92
+ def new_name_for(label)
93
+ @invalid_name_count += 1
94
+ "col_#{@invalid_name_count}"
95
+ end
96
+
97
+ def strip_tags(html)
98
+ html.gsub(/<.*?>/, '').strip
99
+ end
100
+
101
+ end
26
102
  end
27
103
  end
@@ -0,0 +1,90 @@
1
+ require File.join( File.dirname(__FILE__), "spec_helper" )
2
+
3
+ describe DataMapper::YunkerStar do
4
+ class BlankHtml
5
+ include DataMapper::YunkerStar
6
+ uri spec_data_path("blank.html")
7
+ end
8
+
9
+ class BlankStyle
10
+ include DataMapper::YunkerStar
11
+ uri spec_data_path("ki.html")
12
+ end
13
+
14
+ class TableStyle < BlankStyle
15
+ uri spec_data_path("ki.html")
16
+ table "table.main"
17
+ end
18
+
19
+ class TheadStyle < BlankStyle
20
+ uri spec_data_path("ki.html")
21
+ thead "table.main"
22
+ end
23
+
24
+ it "should provide proxy" do
25
+ BlankStyle.should respond_to(:proxy)
26
+ end
27
+
28
+ describe ".proxy" do
29
+ it "should provide guess_table" do
30
+ BlankStyle.proxy.should respond_to(:guess_table)
31
+ end
32
+
33
+ describe "#guess_table" do
34
+ it "should return a Hpricot::Elem" do
35
+ BlankStyle.proxy.guess_table.class.should == Hpricot::Elem
36
+ end
37
+
38
+ it "should return a right element" do
39
+ BlankStyle.proxy.guess_table["class"].should == "main"
40
+ end
41
+
42
+ it "should raise when the html contains no tables" do
43
+ lambda {
44
+ BlankHtml.proxy.guess_table
45
+ }.should raise_error(DataMapper::YunkerStar::Scraper::TableNotFound)
46
+ end
47
+ end
48
+
49
+ it "should provide table" do
50
+ BlankStyle.proxy.should respond_to(:table)
51
+
52
+ describe "#table" do
53
+ it "should raise when the html contains no tables" do
54
+ lambda {
55
+ BlankHtml.proxy.table
56
+ }.should raise_error(DataMapper::YunkerStar::Scraper::TableNotFound)
57
+ end
58
+
59
+ it "should return specified table" do
60
+ table = TableStyle.proxy.table
61
+ table.class.should == Hpricot::Elem
62
+ table[:class].should == "main"
63
+ end
64
+ end
65
+ end
66
+
67
+ it "should provide thead" do
68
+ BlankStyle.proxy.should respond_to(:thead)
69
+
70
+ describe "#thead" do
71
+ it "should raise when the html contains no tables" do
72
+ lambda {
73
+ BlankHtml.proxy.thead
74
+ }.should raise_error(DataMapper::YunkerStar::Scraper::TableNotFound)
75
+ end
76
+ end
77
+ end
78
+
79
+ it "should provide labels" do
80
+ BlankStyle.proxy.should respond_to(:labels)
81
+
82
+ describe "#labels" do
83
+ it "should return th values" do
84
+ BlankStyle.proxy.labels.map(&:strip).should == %w( col1 col2 col3 col4 )
85
+ end
86
+ end
87
+ end
88
+
89
+ end
90
+ end
@@ -0,0 +1,12 @@
1
+ require "rubygems"
2
+ require "spec"
3
+
4
+ require File.dirname(__FILE__) + "/../lib/dm-ys"
5
+
6
+ def spec_data_path(name)
7
+ Pathname(File.dirname(__FILE__) + "/data/#{name}")
8
+ end
9
+
10
+ def spec_data(name)
11
+ spec_data_path(name).read
12
+ end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: maiha-dm-ys
3
3
  version: !ruby/object:Gem::Version
4
- version: "0.1"
4
+ version: "0.2"
5
5
  platform: ruby
6
6
  authors:
7
7
  - maiha
@@ -59,6 +59,11 @@ files:
59
59
  - lib/dm-ys/scraper.rb
60
60
  - lib/dm-ys/cached_accessor.rb
61
61
  - lib/dm-ys/proxy.rb
62
+ - spec/guess_spec.rb
63
+ - spec/data
64
+ - spec/data/ki.html
65
+ - spec/data/blank.html
66
+ - spec/spec_helper.rb
62
67
  has_rdoc: true
63
68
  homepage: http://github.com/maiha/dm-ys
64
69
  post_install_message: