cut 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
data/.gitignore ADDED
@@ -0,0 +1,17 @@
1
+ *.gem
2
+ *.rbc
3
+ .bundle
4
+ .config
5
+ .yardoc
6
+ Gemfile.lock
7
+ InstalledFiles
8
+ _yardoc
9
+ coverage
10
+ doc/
11
+ lib/bundler/man
12
+ pkg
13
+ rdoc
14
+ spec/reports
15
+ test/tmp
16
+ test/version_tmp
17
+ tmp
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in cut.gemspec
4
+ gemspec
data/LICENSE.txt ADDED
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2013 Ryan Closner
2
+
3
+ MIT License
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,51 @@
1
+ # Cut
2
+
3
+ A DSL for Scraping Websites
4
+
5
+ ## Installation
6
+
7
+ Add this line to your application's Gemfile:
8
+
9
+ gem 'cut'
10
+
11
+ And then execute:
12
+
13
+ $ bundle
14
+
15
+ Or install it yourself as:
16
+
17
+ $ gem install cut
18
+
19
+ ## Usage
20
+
21
+ Search Google:
22
+
23
+ ```ruby
24
+ class SearchResult
25
+
26
+ include Cut
27
+
28
+ url "http://google.com/search?q={{keywords}}"
29
+
30
+ selector "li.g"
31
+
32
+ map :title, String, to: "h3.r"
33
+
34
+ end
35
+ ```
36
+
37
+ Return Results:
38
+
39
+ ```ruby
40
+ SearchResult.all(keywords: "war and peace")
41
+ #=> [#<SearchResult:0x007fd18be96588 @title="War and Peace - Wikipedia, the free encyclopedia">, #<SearchResult:0x007fd18b97c098 @title="War and Peace (1956) - IMDb">, #<SearchResult:0x007fd18b986188 @title="War and Peace (Vintage Classics): Leo Tolstoy, Richard Pevear ...">, #<SearchResult:0x007fd18b874038 @title="War and Peace by graf Leo Tolstoy - Free Ebook - Project Gutenberg">, #<SearchResult:0x007fd18b8b46b0 @title="SparkNotes: War and Peace">, #<SearchResult:0x007fd18bc070d8 @title="War and Peace by Leo Tolstoy - Reviews, Discussion, Bookclubs, Lists">, #<SearchResult:0x007fd18bf7c8d0 @title="War and Peace - The Literature Network">, #<SearchResult:0x007fd18bf7a0f8 @title="War and Peace - graf Leo Tolstoy - Google Books">, #<SearchResult:0x007fd18bfc2d58 @title="Images for war and peace">, #<SearchResult:0x007fd189397be8 @title="War and Peace - Planet PDF">, #<SearchResult:0x007fd1893c6268 @title="War and Peace - Shmoop">, #<SearchResult:0x007fd1895fe0f8 @title="News for war and peace">]
42
+ ```
43
+
44
+
45
+ ## Contributing
46
+
47
+ 1. Fork it
48
+ 2. Create your feature branch (`git checkout -b my-new-feature`)
49
+ 3. Commit your changes (`git commit -am 'Add some feature'`)
50
+ 4. Push to the branch (`git push origin my-new-feature`)
51
+ 5. Create new Pull Request
data/Rakefile ADDED
@@ -0,0 +1 @@
1
+ require "bundler/gem_tasks"
data/cut.gemspec ADDED
@@ -0,0 +1,35 @@
1
+ # coding: utf-8
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'cut/version'
5
+
6
+ Gem::Specification.new do |spec|
7
+ spec.name = "cut"
8
+ spec.version = Cut::VERSION
9
+ spec.authors = ["Ryan Closner"]
10
+ spec.email = ["ryan@ryanclosner.com"]
11
+ spec.description = %q{A Scraping DSL}
12
+ spec.summary = %q{A Scraping DSL}
13
+ spec.homepage = "http://github.com/rclosner/cut"
14
+ spec.license = "MIT"
15
+
16
+ runtime_dependencies = {
17
+ 'httparty' => '~> 0.11.0',
18
+ 'nokogiri' => '~> 1.6.0',
19
+ 'virtus' => '~> 0.5.5'
20
+ }
21
+
22
+ runtime_dependencies.each {|lib, version| spec.add_runtime_dependency(lib, version) }
23
+
24
+ development_dependencies = {
25
+ 'bundler' => '~> 1.3',
26
+ 'rake' => '~> 10.1.0'
27
+ }
28
+
29
+ development_dependencies.each {|lib, version| spec.add_development_dependency(lib, version) }
30
+
31
+ spec.files = `git ls-files`.split($/)
32
+ spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
33
+ spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
34
+ spec.require_paths = ["lib"]
35
+ end
@@ -0,0 +1,46 @@
1
+ module Cut
2
+ module ClassMethods
3
+
4
+ def url(new_url)
5
+ @@url = new_url
6
+ end
7
+
8
+ def selector(new_selector)
9
+ @@selector = new_selector
10
+ end
11
+
12
+ def map(*args)
13
+ add_mapping(*args)
14
+ end
15
+
16
+ def all(options = {})
17
+ endpoint = @@url.dup
18
+ options.each {|key,value| endpoint.gsub!("{{#{key}}}", CGI.escape(value)) }
19
+ response = Client.get(endpoint)
20
+
21
+ parse(response)
22
+ end
23
+
24
+ private
25
+
26
+ def mappings
27
+ @@mappings ||= []
28
+ end
29
+
30
+ def add_mapping(name, type, options)
31
+ mappings << Mapping.new(name, options)
32
+ send(:attribute, name, type)
33
+ end
34
+
35
+ def parse(response)
36
+ response.css(@@selector).map do |node|
37
+ new.tap do |instance|
38
+ mappings.each do |mapping|
39
+ instance.send("#{mapping.name}=", node.at_css(mapping.selector).value)
40
+ end
41
+ end
42
+ end
43
+ end
44
+
45
+ end
46
+ end
data/lib/cut/client.rb ADDED
@@ -0,0 +1,21 @@
1
+ module Cut
2
+ class Client
3
+
4
+ def self.get(endpoint)
5
+ new(endpoint).get
6
+ end
7
+
8
+ def initialize(endpoint)
9
+ @endpoint = endpoint
10
+ end
11
+
12
+ def get
13
+ Response.new(HTTParty.get(endpoint))
14
+ end
15
+
16
+ private
17
+
18
+ attr_reader :endpoint
19
+
20
+ end
21
+ end
@@ -0,0 +1,4 @@
1
+ module Cut
2
+ module InstanceMethods
3
+ end
4
+ end
@@ -0,0 +1,16 @@
1
+ module Cut
2
+ class Mapping
3
+
4
+ attr_reader :name
5
+
6
+ def initialize(name, options = {})
7
+ @name = name
8
+ @selector = options[:to]
9
+ end
10
+
11
+ def selector
12
+ @selector ||= ".#{name}"
13
+ end
14
+
15
+ end
16
+ end
data/lib/cut/node.rb ADDED
@@ -0,0 +1,25 @@
1
+ module Cut
2
+ class Node
3
+
4
+ def initialize(node)
5
+ @node = node
6
+ end
7
+
8
+ def css(selector)
9
+ node.css(selector).map {|n| Node.new(n) }
10
+ end
11
+
12
+ def at_css(selector)
13
+ Node.new(node.at_css(selector))
14
+ end
15
+
16
+ def value
17
+ node.text
18
+ end
19
+
20
+ private
21
+
22
+ attr_reader :node
23
+
24
+ end
25
+ end
@@ -0,0 +1,21 @@
1
+ module Cut
2
+ class Response < Node
3
+
4
+ def initialize(response)
5
+ @response = response
6
+ end
7
+
8
+ private
9
+
10
+ attr_reader :response
11
+
12
+ def body
13
+ response.body
14
+ end
15
+
16
+ def node
17
+ @node ||= Nokogiri::HTML(body)
18
+ end
19
+
20
+ end
21
+ end
@@ -0,0 +1,3 @@
1
+ module Cut
2
+ VERSION = "0.0.1"
3
+ end
data/lib/cut.rb ADDED
@@ -0,0 +1,21 @@
1
+ require "httparty"
2
+ require "nokogiri"
3
+ require "virtus"
4
+
5
+ require "cut/version"
6
+ require "cut/class_methods"
7
+ require "cut/instance_methods"
8
+ require "cut/mapping"
9
+ require "cut/client"
10
+ require "cut/node"
11
+ require "cut/response"
12
+
13
+ module Cut
14
+
15
+ def self.included(base)
16
+ base.extend(ClassMethods)
17
+ base.send(:include, InstanceMethods)
18
+ base.send(:include, Virtus)
19
+ end
20
+
21
+ end
metadata ADDED
@@ -0,0 +1,141 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: cut
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - Ryan Closner
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2013-07-12 00:00:00.000000000 Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: httparty
16
+ requirement: !ruby/object:Gem::Requirement
17
+ none: false
18
+ requirements:
19
+ - - ~>
20
+ - !ruby/object:Gem::Version
21
+ version: 0.11.0
22
+ type: :runtime
23
+ prerelease: false
24
+ version_requirements: !ruby/object:Gem::Requirement
25
+ none: false
26
+ requirements:
27
+ - - ~>
28
+ - !ruby/object:Gem::Version
29
+ version: 0.11.0
30
+ - !ruby/object:Gem::Dependency
31
+ name: nokogiri
32
+ requirement: !ruby/object:Gem::Requirement
33
+ none: false
34
+ requirements:
35
+ - - ~>
36
+ - !ruby/object:Gem::Version
37
+ version: 1.6.0
38
+ type: :runtime
39
+ prerelease: false
40
+ version_requirements: !ruby/object:Gem::Requirement
41
+ none: false
42
+ requirements:
43
+ - - ~>
44
+ - !ruby/object:Gem::Version
45
+ version: 1.6.0
46
+ - !ruby/object:Gem::Dependency
47
+ name: virtus
48
+ requirement: !ruby/object:Gem::Requirement
49
+ none: false
50
+ requirements:
51
+ - - ~>
52
+ - !ruby/object:Gem::Version
53
+ version: 0.5.5
54
+ type: :runtime
55
+ prerelease: false
56
+ version_requirements: !ruby/object:Gem::Requirement
57
+ none: false
58
+ requirements:
59
+ - - ~>
60
+ - !ruby/object:Gem::Version
61
+ version: 0.5.5
62
+ - !ruby/object:Gem::Dependency
63
+ name: bundler
64
+ requirement: !ruby/object:Gem::Requirement
65
+ none: false
66
+ requirements:
67
+ - - ~>
68
+ - !ruby/object:Gem::Version
69
+ version: '1.3'
70
+ type: :development
71
+ prerelease: false
72
+ version_requirements: !ruby/object:Gem::Requirement
73
+ none: false
74
+ requirements:
75
+ - - ~>
76
+ - !ruby/object:Gem::Version
77
+ version: '1.3'
78
+ - !ruby/object:Gem::Dependency
79
+ name: rake
80
+ requirement: !ruby/object:Gem::Requirement
81
+ none: false
82
+ requirements:
83
+ - - ~>
84
+ - !ruby/object:Gem::Version
85
+ version: 10.1.0
86
+ type: :development
87
+ prerelease: false
88
+ version_requirements: !ruby/object:Gem::Requirement
89
+ none: false
90
+ requirements:
91
+ - - ~>
92
+ - !ruby/object:Gem::Version
93
+ version: 10.1.0
94
+ description: A Scraping DSL
95
+ email:
96
+ - ryan@ryanclosner.com
97
+ executables: []
98
+ extensions: []
99
+ extra_rdoc_files: []
100
+ files:
101
+ - .gitignore
102
+ - Gemfile
103
+ - LICENSE.txt
104
+ - README.md
105
+ - Rakefile
106
+ - cut.gemspec
107
+ - lib/cut.rb
108
+ - lib/cut/class_methods.rb
109
+ - lib/cut/client.rb
110
+ - lib/cut/instance_methods.rb
111
+ - lib/cut/mapping.rb
112
+ - lib/cut/node.rb
113
+ - lib/cut/response.rb
114
+ - lib/cut/version.rb
115
+ homepage: http://github.com/rclosner/cut
116
+ licenses:
117
+ - MIT
118
+ post_install_message:
119
+ rdoc_options: []
120
+ require_paths:
121
+ - lib
122
+ required_ruby_version: !ruby/object:Gem::Requirement
123
+ none: false
124
+ requirements:
125
+ - - ! '>='
126
+ - !ruby/object:Gem::Version
127
+ version: '0'
128
+ required_rubygems_version: !ruby/object:Gem::Requirement
129
+ none: false
130
+ requirements:
131
+ - - ! '>='
132
+ - !ruby/object:Gem::Version
133
+ version: '0'
134
+ requirements: []
135
+ rubyforge_project:
136
+ rubygems_version: 1.8.23
137
+ signing_key:
138
+ specification_version: 3
139
+ summary: A Scraping DSL
140
+ test_files: []
141
+ has_rdoc: