maiha-css_parser 0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/README ADDED
@@ -0,0 +1,19 @@
1
+ css_parser
2
+ ==========
3
+
4
+ hpricot helper that scrapes html easily by parser class defined css selector
5
+
6
+
7
+ Example
8
+ =======
9
+
10
+ class UserParser < CssParser
11
+ css :name, "div#contents span.name"
12
+ css :age , "div#contents span.age"
13
+ end
14
+
15
+ parser = UserParser.file('user.html')
16
+ User.new parser.attributes
17
+
18
+
19
+ Copyright (c) 2008 maiha@wota.jp, released under the MIT license
data/Rakefile ADDED
@@ -0,0 +1,52 @@
1
+ require 'rubygems'
2
+ require 'rake/gempackagetask'
3
+
4
+ GEM_NAME = "css_parser"
5
+ AUTHOR = "maiha"
6
+ EMAIL = "maiha@wota.jp"
7
+ HOMEPAGE = "http://github.com/maiha/css_parser"
8
+ SUMMARY = "hpricot helper that scrapes html easily by parser class defined css selector"
9
+ GEM_VERSION = "0.1"
10
+
11
+ spec = Gem::Specification.new do |s|
12
+ # s.rubyforge_project = 'merb'
13
+ s.name = GEM_NAME
14
+ s.version = GEM_VERSION
15
+ s.platform = Gem::Platform::RUBY
16
+ s.has_rdoc = true
17
+ s.extra_rdoc_files = ["README", "LICENSE", 'TODO']
18
+ s.summary = SUMMARY
19
+ s.description = s.summary
20
+ s.author = AUTHOR
21
+ s.email = EMAIL
22
+ s.homepage = HOMEPAGE
23
+ s.add_dependency('hpricot', '>= 0.1')
24
+ s.add_dependency('dsl_accessor', '>= 0.1')
25
+ s.require_path = 'lib'
26
+ s.files = %w(LICENSE README Rakefile TODO) + Dir.glob("{lib,spec,app,public,stubs}/**/*")
27
+ end
28
+
29
+ Rake::GemPackageTask.new(spec) do |pkg|
30
+ pkg.gem_spec = spec
31
+ end
32
+
33
+ desc "Install the gem"
34
+ task :install do
35
+ Merb::RakeHelper.install(GEM_NAME, :version => GEM_VERSION)
36
+ end
37
+
38
+ desc "Uninstall the gem"
39
+ task :uninstall do
40
+ Merb::RakeHelper.uninstall(GEM_NAME, :version => GEM_VERSION)
41
+ end
42
+
43
+ desc "Create a gemspec file"
44
+ task :gemspec do
45
+ File.open("#{GEM_NAME}.gemspec", "w") do |file|
46
+ file.puts spec.to_ruby
47
+ end
48
+ end
49
+
50
+ require 'spec/rake/spectask'
51
+ desc 'Default: run spec examples'
52
+ task :default => 'spec'
data/lib/css_parser.rb ADDED
@@ -0,0 +1,79 @@
1
+ require 'rubygems'
2
+ require 'dsl_accessor'
3
+ require 'hpricot'
4
+
5
+ class CssParser
6
+ dsl_accessor :stored_css, proc{{}}
7
+
8
+ ######################################################################
9
+ ### Exceptions
10
+
11
+ class ReservedCss < StandardError; end
12
+
13
+ ######################################################################
14
+ ### InstanceMethods
15
+
16
+ def initialize(html = nil, filename = nil)
17
+ @html = html.to_s
18
+ @filename = filename
19
+ end
20
+
21
+ def parser
22
+ @parser ||= Hpricot(@html)
23
+ end
24
+
25
+ def attributes(keys = nil)
26
+ keys ||= self.class.my_stored_css.keys
27
+ keys.inject({}){|h,key| h[key] = send(key); h}
28
+ end
29
+
30
+ ######################################################################
31
+ ### Class Methods
32
+
33
+ def self.file(file)
34
+ html = NKF.nkf('-w', Pathname(file).read)
35
+ new(html, file)
36
+ end
37
+
38
+ def self.css(key, pattern)
39
+ key = key.to_s.intern
40
+ guard_from_overridden(key)
41
+ define_css(key, pattern)
42
+ end
43
+
44
+ private
45
+ def self.css_module
46
+ @css_module ||= (include (mod = Module.new); mod)
47
+ end
48
+
49
+ # stored_css object for this class
50
+ def self.my_stored_css
51
+ @my_stored_css ||= (stored_css.dup rescue stored_css)
52
+ end
53
+
54
+ def self.define_css(key, pattern)
55
+ # not defined yet
56
+ unless instance_methods.include?(key.to_s)
57
+ css_module.module_eval do
58
+ define_method(key) do
59
+ pattern = self.class.my_stored_css[key]
60
+ element = parser.search(pattern).first
61
+ element ? element.inner_html : nil
62
+ end
63
+ end
64
+ end
65
+
66
+ my_stored_css[key] = pattern
67
+ end
68
+
69
+ def self.guard_from_overridden(key)
70
+ return if my_stored_css.has_key?(key)
71
+
72
+ if instance_methods(true).include?(key.to_s)
73
+ raise ReservedCss, "#{key} is reserved for #{self.to_s.classify}##{key}"
74
+ end
75
+ if %w( attributes parser ).include?(key.to_s)
76
+ raise ReservedCss, "#{key} is reserved for CssParser module"
77
+ end
78
+ end
79
+ end
@@ -0,0 +1,96 @@
1
+ require File.join( File.dirname(__FILE__), "spec_helper" )
2
+
3
+ describe CssParser do
4
+ it "should provide .file" do
5
+ CssParser.should respond_to(:file)
6
+ end
7
+
8
+ describe ".file" do
9
+ it "should return a CssParser" do
10
+ CssParser.file(__FILE__).class.should == CssParser
11
+ end
12
+ end
13
+
14
+ it "should provide .css" do
15
+ CssParser.should respond_to(:css)
16
+ end
17
+
18
+ describe ".css" do
19
+ it "should create an instance-level accessor to the argument" do
20
+ lambda {CssParser.foo2 }.should raise_error(NoMethodError)
21
+ lambda {CssParser.new.foo2}.should raise_error(NoMethodError)
22
+ CssParser.css :foo2, "pattern"
23
+ lambda {CssParser.foo2 }.should raise_error(NoMethodError)
24
+ lambda {CssParser.new.foo2}.should_not raise_error(NoMethodError)
25
+ end
26
+
27
+ describe " should raise" do
28
+ it "when an existing instance method is specified" do
29
+ lambda {
30
+ CssParser.css :send, "pattern"
31
+ }.should raise_error(CssParser::ReservedCss)
32
+ end
33
+
34
+ it "when reserved methods are specified" do
35
+ lambda {
36
+ CssParser.css :attributes, "pattern"
37
+ }.should raise_error(CssParser::ReservedCss)
38
+ lambda {
39
+ CssParser.css :parser, "pattern"
40
+ }.should raise_error(CssParser::ReservedCss)
41
+ end
42
+
43
+ end
44
+
45
+ it "should parse" do
46
+ class CssParser
47
+ css :foo, "div"
48
+ end
49
+
50
+ foo = CssParser.new('<div>maiha</div>')
51
+ foo.foo.should == "maiha"
52
+ end
53
+
54
+ it "should respect css selector" do
55
+ class Foo < CssParser
56
+ css :name, "div.name"
57
+ end
58
+
59
+ foo = Foo.new('<div>xxx</div><div class=name>maiha</div>')
60
+ foo.name.should == "maiha"
61
+ end
62
+
63
+ it "should define instance method as module" do
64
+ class CssParser
65
+ css :foo, "div"
66
+
67
+ def foo
68
+ "[#{super}]"
69
+ end
70
+ end
71
+
72
+ foo = CssParser.new('<div>a</div>')
73
+ foo.foo.should == "[a]"
74
+ end
75
+ end
76
+
77
+ it "should provide #parser" do
78
+ CssParser.new.should respond_to(:parser)
79
+ end
80
+
81
+ it "should provide #attributes" do
82
+ CssParser.new.should respond_to(:attributes)
83
+ end
84
+
85
+ describe "#attributes" do
86
+ it "should return composed hash" do
87
+ class Foo < CssParser
88
+ css :name, "#name"
89
+ css :age , "#age"
90
+ end
91
+
92
+ Foo.new('').attributes.should == {:age=>nil, :name=>nil}
93
+ end
94
+ end
95
+
96
+ end
@@ -0,0 +1,6 @@
1
+ require "rubygems"
2
+ require "hpricot"
3
+ require "spec"
4
+
5
+ require File.dirname(__FILE__) + "/../init"
6
+
metadata ADDED
@@ -0,0 +1,78 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: maiha-css_parser
3
+ version: !ruby/object:Gem::Version
4
+ version: "0.1"
5
+ platform: ruby
6
+ authors:
7
+ - maiha
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+
12
+ date: 2009-01-23 00:00:00 -08:00
13
+ default_executable:
14
+ dependencies:
15
+ - !ruby/object:Gem::Dependency
16
+ name: hpricot
17
+ version_requirement:
18
+ version_requirements: !ruby/object:Gem::Requirement
19
+ requirements:
20
+ - - ">="
21
+ - !ruby/object:Gem::Version
22
+ version: "0.1"
23
+ version:
24
+ - !ruby/object:Gem::Dependency
25
+ name: dsl_accessor
26
+ version_requirement:
27
+ version_requirements: !ruby/object:Gem::Requirement
28
+ requirements:
29
+ - - ">="
30
+ - !ruby/object:Gem::Version
31
+ version: "0.1"
32
+ version:
33
+ description: hpricot helper that scrapes html easily by parser class defined css selector
34
+ email: maiha@wota.jp
35
+ executables: []
36
+
37
+ extensions: []
38
+
39
+ extra_rdoc_files:
40
+ - README
41
+ - LICENSE
42
+ - TODO
43
+ files:
44
+ - LICENSE
45
+ - README
46
+ - Rakefile
47
+ - TODO
48
+ - lib/css_parser.rb
49
+ - spec/spec_helper.rb
50
+ - spec/css_parser_spec.rb
51
+ has_rdoc: true
52
+ homepage: http://github.com/maiha/css_parser
53
+ post_install_message:
54
+ rdoc_options: []
55
+
56
+ require_paths:
57
+ - lib
58
+ required_ruby_version: !ruby/object:Gem::Requirement
59
+ requirements:
60
+ - - ">="
61
+ - !ruby/object:Gem::Version
62
+ version: "0"
63
+ version:
64
+ required_rubygems_version: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - ">="
67
+ - !ruby/object:Gem::Version
68
+ version: "0"
69
+ version:
70
+ requirements: []
71
+
72
+ rubyforge_project:
73
+ rubygems_version: 1.2.0
74
+ signing_key:
75
+ specification_version: 2
76
+ summary: hpricot helper that scrapes html easily by parser class defined css selector
77
+ test_files: []
78
+