ruby-query 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
data/LICENSE.txt ADDED
@@ -0,0 +1,20 @@
1
+ Copyright (c) 2011 Francis Chong
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining
4
+ a copy of this software and associated documentation files (the
5
+ "Software"), to deal in the Software without restriction, including
6
+ without limitation the rights to use, copy, modify, merge, publish,
7
+ distribute, sublicense, and/or sell copies of the Software, and to
8
+ permit persons to whom the Software is furnished to do so, subject to
9
+ the following conditions:
10
+
11
+ The above copyright notice and this permission notice shall be
12
+ included in all copies or substantial portions of the Software.
13
+
14
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,102 @@
1
+ # Ruby-Query
2
+
3
+ jQuery for command line and ruby. It make extract simple data fun and easy.
4
+
5
+ It use Nokogiri for CSS extraction. Inspired by node [Query](https://github.com/visionmedia/query)
6
+
7
+ ## Installation
8
+
9
+ $ gem install ruby-query
10
+
11
+
12
+ ## Ruby Examples
13
+
14
+ Extract an attribute from a webpage:
15
+
16
+ require 'open-uri'
17
+ require 'ruby-query'
18
+
19
+ RQuery::Query.query(open("http://twitter.com"), "a#logo img", "attr", "alt")
20
+ => "Twitter"
21
+
22
+ Extract a form input value :
23
+
24
+ require 'ruby-query'
25
+
26
+ RQuery::Query.query('<input type="text" value="tj@vision-media.ca"/>', "input", "val")
27
+ => "tj@vision-media.ca"
28
+
29
+ ## Command Line Examples
30
+
31
+ Twitter logo alt text:
32
+
33
+ $ curl http://twitter.com | query 'a#logo img' attr alt
34
+ Twitter
35
+
36
+ Alternately, since the output is simply more html, we can achieve this same result via pipes:
37
+
38
+ $ curl http://twitter.com | query 'a#logo' | query img attr alt
39
+ Twitter
40
+
41
+ Check if a class is present:
42
+
43
+ $ curl http://twitter.com | query .article '#timeline' hasClass statuses
44
+ true
45
+
46
+ $ echo $?
47
+ 0
48
+
49
+ Exit status for bools:
50
+
51
+ $ echo '<div class="foo bar"></div>' | ./index.js div hasClass baz
52
+ false
53
+
54
+ $ echo $?
55
+ 1
56
+
57
+ Grab width or height attributes:
58
+
59
+ $ echo '<div class="user" width="300"></div>' | query div.user width
60
+ 300
61
+
62
+ Output element text:
63
+
64
+ $ echo '<p>very <em>slick</em></p>' | query p text
65
+ very slick
66
+
67
+ Values:
68
+
69
+ $ echo '<input type="text" value="tj@vision-media.ca"/>' | query input val
70
+ tj@vision-media.ca
71
+
72
+ Get second li's text:
73
+
74
+ $ echo $list | query ul li get 1 text
75
+ two
76
+
77
+ Get third li's text using `next`:
78
+
79
+ $ echo $list | query ul li get 1 next text
80
+ three
81
+
82
+ Get length:
83
+
84
+ $ echo '<ul><li></li><li></li></ul>' | query li length
85
+ 2
86
+
87
+
88
+ ## Contributing to RQuery
89
+
90
+ * Check out the latest master to make sure the feature hasn't been implemented or the bug hasn't been fixed yet
91
+ * Check out the issue tracker to make sure someone already hasn't requested it and/or contributed it
92
+ * Fork the project
93
+ * Start a feature/bugfix branch
94
+ * Commit and push until you are happy with your contribution
95
+ * Make sure to add tests for it. This is important so I don't break it in a future version unintentionally.
96
+ * Please try not to mess with the Rakefile, version, or history. If you want to have your own version, or is otherwise necessary, that is fine, but please isolate to its own commit so I can cherry-pick around it.
97
+
98
+ ## Copyright
99
+
100
+ Copyright (c) 2011 Francis Chong. See LICENSE.txt for
101
+ further details.
102
+
data/bin/rquery ADDED
@@ -0,0 +1,5 @@
1
+ require 'ruby_query'
2
+
3
+ html = STDIN.read
4
+ puts RubyQuery::Query.query(html, *ARGV)
5
+
data/lib/ruby_query.rb ADDED
@@ -0,0 +1,138 @@
1
+ require 'nokogiri'
2
+
3
+ module RubyQuery
4
+ class QueryError < StandardError
5
+ attr_accessor :context, :command_name, :command_type, :param, :message
6
+ def initialize(context, name, type, param, msg)
7
+ @context = context
8
+ @command_name = name
9
+ @command_type = type
10
+ @param = param
11
+ @message = msg
12
+ end
13
+
14
+ def to_s
15
+ "#{command_type}:#{command_name} param=#{param}, message=#{message}"
16
+ end
17
+ end
18
+
19
+ class Query
20
+ # supported commands
21
+ COMMANDS = {
22
+ :to_html => {:type => 'method'},
23
+ :inner_html => {:type => 'method'},
24
+ :text => {:type => 'method'},
25
+ :size => {:type => 'method'},
26
+
27
+ :width => {:type => 'attribute'},
28
+ :height => {:type => 'attribute'},
29
+ :value => {:type => 'attribute'},
30
+ :class => {:type => 'attribute'},
31
+
32
+ :first => {:type => 'traverse'},
33
+ :last => {:type => 'traverse'},
34
+ :parent => {:type => 'traverse'},
35
+ :next_sibling => {:type => 'traverse'},
36
+ :previous_sibling => {:type => 'traverse'},
37
+ :at => {:type => 'traverse', :arity => 1},
38
+
39
+ :attr => {:type => 'proc',
40
+ :arity => 1,
41
+ :proc => Proc.new {|context, param, query|
42
+ context.attr(param).to_s rescue ""
43
+ }
44
+ },
45
+ :hasClass => {:type => 'proc',
46
+ :arity => 1,
47
+ :proc => Proc.new {|context, param, query|
48
+ if context.is_a?(Nokogiri::XML::Element)
49
+ !!context["class"].split(" ").find{|x| x == param} rescue false
50
+ elsif context.is_a?(Nokogiri::XML::NodeSet)
51
+ !!context.first["class"].split(" ").find{|x| x == param} rescue false
52
+ else
53
+ raise QueryError.new(context, name, type, param, "Cannot get hasClass from #{context.class}")
54
+ end
55
+ }
56
+ }
57
+ }
58
+
59
+ ALIAS = {
60
+ :html => :to_html,
61
+ :len => :length,
62
+ :count => :size,
63
+ :get => :at,
64
+ :"has-class" => :hasClass,
65
+ :val => :value
66
+ }
67
+
68
+ def self.query(html, *query)
69
+ doc = Nokogiri::HTML(html)
70
+ ctx = doc.search(query.shift)
71
+
72
+ while method = query.shift
73
+ command = COMMANDS[method.to_sym] || COMMANDS[ALIAS[method.to_sym]] || COMMANDS[:to_html]
74
+ param = query.length > 0 ? (command[:arity] ? query.shift : nil) : nil
75
+ ctx = handle_command(ctx, ALIAS[method.to_sym] || method.to_sym, command[:type], param, query)
76
+ end
77
+
78
+ if ctx.is_a?(Nokogiri::XML::Element) || ctx.is_a?(Nokogiri::XML::NodeSet)
79
+ ctx = ctx.to_html
80
+ else
81
+ ctx
82
+ end
83
+ end
84
+
85
+ private
86
+ def self.handle_command(context, name, type, param=nil, query=nil)
87
+ case type
88
+ when 'method'
89
+ if context.is_a?(Nokogiri::XML::Element) || context.is_a?(Nokogiri::XML::NodeSet)
90
+ if param
91
+ context.send(name, param)
92
+ else
93
+ context.send(name)
94
+ end
95
+
96
+ elsif context.nil?
97
+ nil
98
+
99
+ else
100
+ raise QueryError.new(context, name, type, param, "Cannot apply #{name} to #{context.class}")
101
+ end
102
+
103
+ when 'attribute'
104
+ if context.is_a?(Nokogiri::XML::Element)
105
+ context[name]
106
+ elsif context.is_a?(Nokogiri::XML::NodeSet)
107
+ context.first[name]
108
+ else
109
+ raise QueryError.new(context, name, type, param, "Cannot get attr #{name} from #{context.class}")
110
+ end
111
+
112
+ when 'traverse'
113
+ if name == :at
114
+ if param.nil?
115
+ raise QueryError.new(context, name, type, param, "missing at parameter")
116
+ end
117
+ context[param.to_i]
118
+ else
119
+ if context.is_a?(Nokogiri::XML::Element) || context.is_a?(Nokogiri::XML::NodeSet)
120
+ if param
121
+ context.send(name, param)
122
+ else
123
+ context.send(name)
124
+ end
125
+ else
126
+ raise QueryError.new(context, name, type, param, "Cannot tranverse #{context.class}")
127
+ end
128
+ end
129
+
130
+ when 'proc'
131
+ COMMANDS[name][:proc].call(context, param, query)
132
+
133
+ else
134
+ raise QueryError.new(context, name, type, param, 'Unexpected type: #{type}')
135
+ end
136
+ end
137
+ end
138
+ end
@@ -0,0 +1,65 @@
1
+ require 'spec_helper'
2
+
3
+ describe RubyQuery do
4
+ HTML_SIMPLE = "<html><body><a href='test' id='logo'><img src='1.gif' alt='Test'/></a><ul class='list'><li>point a</li><li>point b</li></ul></body></html>"
5
+ HTML_LIST = "<ul><li>Apple</li><li>Orange</li><li>Cat</li></ul>"
6
+
7
+ it "extract using class" do
8
+ RubyQuery::Query.query(HTML_SIMPLE, ".list li", "first", "text").should == "point a"
9
+ end
10
+
11
+ it "extract using id" do
12
+ RubyQuery::Query.query(HTML_SIMPLE, "a#logo img", "attr", "alt").should == "Test"
13
+ end
14
+
15
+ it "should return empty if a match is not found" do
16
+ RubyQuery::Query.query(HTML_SIMPLE, "a#logo #qk").should be_empty
17
+ RubyQuery::Query.query(HTML_SIMPLE, "a#logo #qk", "attr", "alt").should be_empty
18
+ end
19
+
20
+ it "nested extraction" do
21
+ logo = RubyQuery::Query.query(HTML_SIMPLE, "a#logo")
22
+ RubyQuery::Query.query(logo, "img", "attr", "alt").should == "Test"
23
+ end
24
+
25
+ it "check if a class is present" do
26
+ logo = RubyQuery::Query.query(HTML_SIMPLE, "a")
27
+ RubyQuery::Query.query(logo.to_s, "img", "attr", "alt").should be_true
28
+ end
29
+
30
+ it "grab width or height attributes" do
31
+ RubyQuery::Query.query('<div class="user" width="300"></div>', "div.user", "width").should == "300"
32
+ RubyQuery::Query.query('<div class="user" width="300" height="150"></div>', "div.user", "height").should == "150"
33
+ end
34
+
35
+ it "output element text and inner html" do
36
+ RubyQuery::Query.query('<p>very <em>slick</em></p>', "p", "text").should == "very slick"
37
+ RubyQuery::Query.query('<p>very <em>slick</em></p>', "p", "inner_html").should == "very <em>slick</em>"
38
+ end
39
+
40
+ it "get value" do
41
+ RubyQuery::Query.query('<input type="text" value="tj@vision-media.ca"/>', "input", "val").should == "tj@vision-media.ca"
42
+ RubyQuery::Query.query('<input type="text" value="tj@vision-media.ca"/>', "input", "value").should == "tj@vision-media.ca"
43
+ end
44
+
45
+ it "get second li's text" do
46
+ RubyQuery::Query.query(HTML_LIST, "ul li", "get", "1", "text").should == "Orange"
47
+ RubyQuery::Query.query(HTML_LIST, "ul li", "get", "10", "text").should be_nil
48
+ end
49
+
50
+ it "get third li's text using next" do
51
+ RubyQuery::Query.query(HTML_LIST, "ul li", "get", "1", "next", "text").should == "Cat"
52
+ end
53
+
54
+ it "get length" do
55
+ RubyQuery::Query.query(HTML_LIST, "li", "len").should == 3
56
+ RubyQuery::Query.query(HTML_LIST, "li", "length").should == 3
57
+ end
58
+
59
+ it "should handl html or text" do
60
+ RubyQuery::Query.query(HTML_LIST, "li", "first", "text").should == "Apple"
61
+ RubyQuery::Query.query(HTML_LIST, "li", "first", "html").should == "<li>Apple</li>\n"
62
+ RubyQuery::Query.query(HTML_LIST, "li", "html").should == "<li>Apple</li>\n<li>Orange</li>\n<li>Cat</li>"
63
+ end
64
+
65
+ end
@@ -0,0 +1,13 @@
1
+ require 'rubygems'
2
+ require 'json'
3
+
4
+ $LOAD_PATH.unshift(File.dirname(__FILE__))
5
+ $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
6
+
7
+ require 'ruby_query'
8
+ require 'rspec'
9
+ require 'rspec/autorun'
10
+
11
+ RSpec.configure do |config|
12
+
13
+ end
metadata ADDED
@@ -0,0 +1,161 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: ruby-query
3
+ version: !ruby/object:Gem::Version
4
+ hash: 27
5
+ prerelease: false
6
+ segments:
7
+ - 0
8
+ - 1
9
+ - 0
10
+ version: 0.1.0
11
+ platform: ruby
12
+ authors:
13
+ - Francis Chong
14
+ autorequire: ruby-query
15
+ bindir: bin
16
+ cert_chain: []
17
+
18
+ date: 2011-02-12 00:00:00 +08:00
19
+ default_executable: rquery
20
+ dependencies:
21
+ - !ruby/object:Gem::Dependency
22
+ requirement: &id001 !ruby/object:Gem::Requirement
23
+ none: false
24
+ requirements:
25
+ - - ">="
26
+ - !ruby/object:Gem::Version
27
+ hash: 3
28
+ segments:
29
+ - 0
30
+ version: "0"
31
+ type: :runtime
32
+ name: nokogiri
33
+ prerelease: false
34
+ version_requirements: *id001
35
+ - !ruby/object:Gem::Dependency
36
+ requirement: &id002 !ruby/object:Gem::Requirement
37
+ none: false
38
+ requirements:
39
+ - - ">="
40
+ - !ruby/object:Gem::Version
41
+ hash: 3
42
+ segments:
43
+ - 0
44
+ version: "0"
45
+ type: :development
46
+ name: rake
47
+ prerelease: false
48
+ version_requirements: *id002
49
+ - !ruby/object:Gem::Dependency
50
+ requirement: &id003 !ruby/object:Gem::Requirement
51
+ none: false
52
+ requirements:
53
+ - - ">="
54
+ - !ruby/object:Gem::Version
55
+ hash: 3
56
+ segments:
57
+ - 0
58
+ version: "0"
59
+ type: :development
60
+ name: json
61
+ prerelease: false
62
+ version_requirements: *id003
63
+ - !ruby/object:Gem::Dependency
64
+ requirement: &id004 !ruby/object:Gem::Requirement
65
+ none: false
66
+ requirements:
67
+ - - ">="
68
+ - !ruby/object:Gem::Version
69
+ hash: 3
70
+ segments:
71
+ - 0
72
+ version: "0"
73
+ type: :development
74
+ name: jeweler
75
+ prerelease: false
76
+ version_requirements: *id004
77
+ - !ruby/object:Gem::Dependency
78
+ requirement: &id005 !ruby/object:Gem::Requirement
79
+ none: false
80
+ requirements:
81
+ - - ">"
82
+ - !ruby/object:Gem::Version
83
+ hash: 7
84
+ segments:
85
+ - 1
86
+ - 4
87
+ - 0
88
+ version: 1.4.0
89
+ type: :runtime
90
+ name: nokogiri
91
+ prerelease: false
92
+ version_requirements: *id005
93
+ - !ruby/object:Gem::Dependency
94
+ requirement: &id006 !ruby/object:Gem::Requirement
95
+ none: false
96
+ requirements:
97
+ - - ">"
98
+ - !ruby/object:Gem::Version
99
+ hash: 25
100
+ segments:
101
+ - 1
102
+ - 2
103
+ - 3
104
+ version: 1.2.3
105
+ type: :development
106
+ name: rspec
107
+ prerelease: false
108
+ version_requirements: *id006
109
+ description: RQuery is a simple jQuery style method to extract HTML
110
+ email: francis@ignition.hk
111
+ executables:
112
+ - rquery
113
+ extensions: []
114
+
115
+ extra_rdoc_files:
116
+ - LICENSE.txt
117
+ - README.md
118
+ files:
119
+ - bin/rquery
120
+ - lib/ruby_query.rb
121
+ - LICENSE.txt
122
+ - README.md
123
+ - spec/rquery/rquery_spec.rb
124
+ - spec/spec_helper.rb
125
+ has_rdoc: true
126
+ homepage: http://github.com/siuying/rquery
127
+ licenses:
128
+ - MIT
129
+ post_install_message:
130
+ rdoc_options: []
131
+
132
+ require_paths:
133
+ - lib
134
+ required_ruby_version: !ruby/object:Gem::Requirement
135
+ none: false
136
+ requirements:
137
+ - - ">="
138
+ - !ruby/object:Gem::Version
139
+ hash: 3
140
+ segments:
141
+ - 0
142
+ version: "0"
143
+ required_rubygems_version: !ruby/object:Gem::Requirement
144
+ none: false
145
+ requirements:
146
+ - - ">="
147
+ - !ruby/object:Gem::Version
148
+ hash: 3
149
+ segments:
150
+ - 0
151
+ version: "0"
152
+ requirements: []
153
+
154
+ rubyforge_project:
155
+ rubygems_version: 1.3.7
156
+ signing_key:
157
+ specification_version: 3
158
+ summary: Simple jQuery style method to extract HTML
159
+ test_files:
160
+ - spec/rquery/rquery_spec.rb
161
+ - spec/spec_helper.rb