maiha-dm-ys 0.1 → 0.2

Sign up to get free protection for your applications and to get access to all the features.
data/README CHANGED
@@ -6,18 +6,22 @@ a DataMapper extension that uses html table as its schema and data powerfully li
6
6
  Example
7
7
  =======
8
8
 
9
- class Monster
9
+ class Maintainer
10
10
  include DataMapper::YunkerStar
11
-
12
- uri "http://ds.gkwiki2.com/47.html"
13
- thead "table.style_table thead tr"
14
- tbody "table.style_table tbody tr"
11
+ uri "http://datamapper.org/doku.php?id=gem_maintainers"
15
12
  end
16
-
17
- irb(main):001:0> Monster.count
18
- => 120
19
- irb(main):002:0> Monster.first
20
- => #<Monster id=nil LV="2" 種族="妖精" 名称="ピクシー" HP="30" MP="27" ...
13
+
14
+ irb(main):001:0> Maintainer.count
15
+ => 31
16
+
17
+ irb(main):002:0> Maintainer.first
18
+ => #<Maintainer id=nil _Gem="data_objects" _Maintainer="dbussink" Name="Dirkjan Bussink">
19
+
20
+ irb(main):003:0> Maintainer.names
21
+ => ["_Gem", "_Maintainer", "Name"]
22
+
23
+ irb(main):004:0> Maintainer.labels
24
+ => ["Gem", "Maintainer", "Name"]
21
25
 
22
26
 
23
27
  Copyright (c) 2008 maiha@wota.jp, released under the MIT license
data/Rakefile CHANGED
@@ -33,7 +33,7 @@ AUTHOR = "maiha"
33
33
  EMAIL = "maiha@wota.jp"
34
34
  HOMEPAGE = "http://github.com/maiha/dm-ys"
35
35
  SUMMARY = "a DataMapper extension that uses html table as its schema and data powerfully like YunkerStar"
36
- GEM_VERSION = "0.1"
36
+ GEM_VERSION = "0.2"
37
37
 
38
38
  spec = Gem::Specification.new do |s|
39
39
  # s.rubyforge_project = 'merb'
@@ -21,7 +21,9 @@ module DataMapper
21
21
  end
22
22
 
23
23
  def method_missing(symbol, &block)
24
- @klass.send(:define_method, symbol, &block)
24
+ cached = "__cached__#{symbol}"
25
+ @klass.send(:define_method, cached, &block)
26
+ @klass.class_eval("def #{symbol}; @#{cached} ||= #{cached}; end", "(__CACHED_ACCESSOR__)", 1)
25
27
  end
26
28
  end
27
29
  end
data/lib/dm-ys/proxy.rb CHANGED
@@ -15,6 +15,7 @@ module DataMapper
15
15
  model.class_eval do
16
16
  extend ClassMethods
17
17
  dsl_accessor :uri
18
+ dsl_accessor :table
18
19
  dsl_accessor :tbody
19
20
  dsl_accessor :thead
20
21
  property :id, DataMapper::Types::Serial
@@ -28,13 +29,17 @@ module DataMapper
28
29
 
29
30
  def lazy_load
30
31
  loader = Scraper.new(self)
31
- loader.labels.each do |name|
32
+ loader.names.each do |name|
32
33
  type = String # TODO
33
34
  property name.intern, type
34
35
  end
35
36
  return loader
36
37
  end
37
38
 
39
+ def names
40
+ proxy.names
41
+ end
42
+
38
43
  def labels
39
44
  proxy.labels
40
45
  end
@@ -49,7 +54,7 @@ module DataMapper
49
54
 
50
55
  def all
51
56
  @all ||= proxy.entries.map{|array|
52
- new(Hash[*proxy.labels.zip(array).flatten])
57
+ new(Hash[*proxy.names.zip(array).flatten])
53
58
  }
54
59
  end
55
60
 
data/lib/dm-ys/scraper.rb CHANGED
@@ -11,17 +11,93 @@ module DataMapper
11
11
  class Scraper
12
12
  include CachedAccessor
13
13
 
14
+ class TableNotFound < RuntimeError; end
15
+
16
+ attr_reader :html
17
+
14
18
  def initialize(model)
19
+ raise ArgumentError, "missing model" unless model
20
+ raise ArgumentError, "missing uri" unless model.uri
15
21
  @model = model
16
- @html = NKF.nkf('-w', open(@model.uri).read)
22
+ @html = NKF.nkf('-w', open(model.uri).read)
23
+ @invalid_name_count = 0
24
+ end
25
+
26
+ def guess_table
27
+ [max_table_from("table"), max_table_from("table > tbody")].sort_by(&:first).last.last or
28
+ raise TableNotFound, "set 'table' or 'tbody' manually"
17
29
  end
18
30
 
19
31
  cached_accessor do
20
32
  doc {Hpricot(@html)}
21
- labels {doc.search(@model.thead).first.children.map(&:inner_html)}
22
- entries {doc.search(@model.tbody).map{|tr| tr.children.map(&:inner_html)}}
33
+ table {specified(:table) or guess_table}
34
+ thead {specified(:thead) or table.search("> thead").first or table}
35
+ tbody {specified(:tbody) or table.search("> tbody").first or table}
36
+ names {labels.map{|i| label2name(i)}}
37
+ labels {thead.search("> tr").first.search("> td|th").map{|i|strip_tags(i.inner_html)}}
38
+ entries {tbody.search("> tr").map{|tr| tr.search("> td").map{|i|strip_tags(i.inner_html)}}.delete_if{|i|i.blank?}}
23
39
  end
24
- end
25
40
 
41
+ private
42
+
43
+ def max_table_from(entry)
44
+ table = nil
45
+ count = -1
46
+ doc.search(entry).each do |t|
47
+ size = t.search("> tr").size
48
+ if size > count
49
+ count = size
50
+ table = t
51
+ end
52
+ end
53
+ [count, table]
54
+ end
55
+
56
+ def specified(name)
57
+ @model.respond_to?(name) or raise ArgumentError, "invalid selector name: #{name}"
58
+ css = @model.__send__(name) or return nil
59
+
60
+ element = doc.search(css)
61
+ case element
62
+ when Hpricot::Elem
63
+ return element
64
+ when Hpricot::Elements
65
+ return element.first
66
+ else
67
+ return nil
68
+ end
69
+ end
70
+
71
+ def label2name(label)
72
+ require 'cgi'
73
+ label = CGI.unescapeHTML(label)
74
+ label.gsub!(/&nbsp;/, '')
75
+ label.gsub!(/\r?\n/, '')
76
+ label.delete!('!"#$%&()=~|`{}^-[]/<>:; \\')
77
+ label.delete!("'")
78
+ label.strip!
79
+
80
+ if /^([A-Z])/ === label and Object.const_defined?(label)
81
+ label = "_#{label}"
82
+ end
83
+ if label.blank? or @model.respond_to?(label, true)
84
+ new_name_for(label)
85
+ elsif /^[0-9]/ === label
86
+ "_#{label}"
87
+ else
88
+ label
89
+ end
90
+ end
91
+
92
+ def new_name_for(label)
93
+ @invalid_name_count += 1
94
+ "col_#{@invalid_name_count}"
95
+ end
96
+
97
+ def strip_tags(html)
98
+ html.gsub(/<.*?>/, '').strip
99
+ end
100
+
101
+ end
26
102
  end
27
103
  end
@@ -0,0 +1,90 @@
1
+ require File.join( File.dirname(__FILE__), "spec_helper" )
2
+
3
+ describe DataMapper::YunkerStar do
4
+ class BlankHtml
5
+ include DataMapper::YunkerStar
6
+ uri spec_data_path("blank.html")
7
+ end
8
+
9
+ class BlankStyle
10
+ include DataMapper::YunkerStar
11
+ uri spec_data_path("ki.html")
12
+ end
13
+
14
+ class TableStyle < BlankStyle
15
+ uri spec_data_path("ki.html")
16
+ table "table.main"
17
+ end
18
+
19
+ class TheadStyle < BlankStyle
20
+ uri spec_data_path("ki.html")
21
+ thead "table.main"
22
+ end
23
+
24
+ it "should provide proxy" do
25
+ BlankStyle.should respond_to(:proxy)
26
+ end
27
+
28
+ describe ".proxy" do
29
+ it "should provide guess_table" do
30
+ BlankStyle.proxy.should respond_to(:guess_table)
31
+ end
32
+
33
+ describe "#guess_table" do
34
+ it "should return a Hpricot::Elem" do
35
+ BlankStyle.proxy.guess_table.class.should == Hpricot::Elem
36
+ end
37
+
38
+ it "should return a right element" do
39
+ BlankStyle.proxy.guess_table["class"].should == "main"
40
+ end
41
+
42
+ it "should raise when the html contains no tables" do
43
+ lambda {
44
+ BlankHtml.proxy.guess_table
45
+ }.should raise_error(DataMapper::YunkerStar::Scraper::TableNotFound)
46
+ end
47
+ end
48
+
49
+ it "should provide table" do
50
+ BlankStyle.proxy.should respond_to(:table)
51
+
52
+ describe "#table" do
53
+ it "should raise when the html contains no tables" do
54
+ lambda {
55
+ BlankHtml.proxy.table
56
+ }.should raise_error(DataMapper::YunkerStar::Scraper::TableNotFound)
57
+ end
58
+
59
+ it "should return specified table" do
60
+ table = TableStyle.proxy.table
61
+ table.class.should == Hpricot::Elem
62
+ table[:class].should == "main"
63
+ end
64
+ end
65
+ end
66
+
67
+ it "should provide thead" do
68
+ BlankStyle.proxy.should respond_to(:thead)
69
+
70
+ describe "#thead" do
71
+ it "should raise when the html contains no tables" do
72
+ lambda {
73
+ BlankHtml.proxy.thead
74
+ }.should raise_error(DataMapper::YunkerStar::Scraper::TableNotFound)
75
+ end
76
+ end
77
+ end
78
+
79
+ it "should provide labels" do
80
+ BlankStyle.proxy.should respond_to(:labels)
81
+
82
+ describe "#labels" do
83
+ it "should return th values" do
84
+ BlankStyle.proxy.labels.map(&:strip).should == %w( col1 col2 col3 col4 )
85
+ end
86
+ end
87
+ end
88
+
89
+ end
90
+ end
@@ -0,0 +1,12 @@
1
+ require "rubygems"
2
+ require "spec"
3
+
4
+ require File.dirname(__FILE__) + "/../lib/dm-ys"
5
+
6
+ def spec_data_path(name)
7
+ Pathname(File.dirname(__FILE__) + "/data/#{name}")
8
+ end
9
+
10
+ def spec_data(name)
11
+ spec_data_path(name).read
12
+ end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: maiha-dm-ys
3
3
  version: !ruby/object:Gem::Version
4
- version: "0.1"
4
+ version: "0.2"
5
5
  platform: ruby
6
6
  authors:
7
7
  - maiha
@@ -59,6 +59,11 @@ files:
59
59
  - lib/dm-ys/scraper.rb
60
60
  - lib/dm-ys/cached_accessor.rb
61
61
  - lib/dm-ys/proxy.rb
62
+ - spec/guess_spec.rb
63
+ - spec/data
64
+ - spec/data/ki.html
65
+ - spec/data/blank.html
66
+ - spec/spec_helper.rb
62
67
  has_rdoc: true
63
68
  homepage: http://github.com/maiha/dm-ys
64
69
  post_install_message: