cut 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/.gitignore ADDED
@@ -0,0 +1,17 @@
1
+ *.gem
2
+ *.rbc
3
+ .bundle
4
+ .config
5
+ .yardoc
6
+ Gemfile.lock
7
+ InstalledFiles
8
+ _yardoc
9
+ coverage
10
+ doc/
11
+ lib/bundler/man
12
+ pkg
13
+ rdoc
14
+ spec/reports
15
+ test/tmp
16
+ test/version_tmp
17
+ tmp
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in cut.gemspec
4
+ gemspec
data/LICENSE.txt ADDED
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2013 Ryan Closner
2
+
3
+ MIT License
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,51 @@
1
+ # Cut
2
+
3
+ A DSL for Scraping Websites
4
+
5
+ ## Installation
6
+
7
+ Add this line to your application's Gemfile:
8
+
9
+ gem 'cut'
10
+
11
+ And then execute:
12
+
13
+ $ bundle
14
+
15
+ Or install it yourself as:
16
+
17
+ $ gem install cut
18
+
19
+ ## Usage
20
+
21
+ Search Google:
22
+
23
+ ```ruby
24
+ class SearchResult
25
+
26
+ include Cut
27
+
28
+ url "http://google.com/search?q={{keywords}}"
29
+
30
+ selector "li.g"
31
+
32
+ map :title, String, to: "h3.r"
33
+
34
+ end
35
+ ```
36
+
37
+ Return Results:
38
+
39
+ ```ruby
40
+ SearchResult.all(keywords: "war and peace")
41
+ #=> [#<SearchResult:0x007fd18be96588 @title="War and Peace - Wikipedia, the free encyclopedia">, #<SearchResult:0x007fd18b97c098 @title="War and Peace (1956) - IMDb">, #<SearchResult:0x007fd18b986188 @title="War and Peace (Vintage Classics): Leo Tolstoy, Richard Pevear ...">, #<SearchResult:0x007fd18b874038 @title="War and Peace by graf Leo Tolstoy - Free Ebook - Project Gutenberg">, #<SearchResult:0x007fd18b8b46b0 @title="SparkNotes: War and Peace">, #<SearchResult:0x007fd18bc070d8 @title="War and Peace by Leo Tolstoy - Reviews, Discussion, Bookclubs, Lists">, #<SearchResult:0x007fd18bf7c8d0 @title="War and Peace - The Literature Network">, #<SearchResult:0x007fd18bf7a0f8 @title="War and Peace - graf Leo Tolstoy - Google Books">, #<SearchResult:0x007fd18bfc2d58 @title="Images for war and peace">, #<SearchResult:0x007fd189397be8 @title="War and Peace - Planet PDF">, #<SearchResult:0x007fd1893c6268 @title="War and Peace - Shmoop">, #<SearchResult:0x007fd1895fe0f8 @title="News for war and peace">]
42
+ ```
43
+
44
+
45
+ ## Contributing
46
+
47
+ 1. Fork it
48
+ 2. Create your feature branch (`git checkout -b my-new-feature`)
49
+ 3. Commit your changes (`git commit -am 'Add some feature'`)
50
+ 4. Push to the branch (`git push origin my-new-feature`)
51
+ 5. Create new Pull Request
data/Rakefile ADDED
@@ -0,0 +1 @@
1
+ require "bundler/gem_tasks"
data/cut.gemspec ADDED
@@ -0,0 +1,35 @@
1
+ # coding: utf-8
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'cut/version'
5
+
6
+ Gem::Specification.new do |spec|
7
+ spec.name = "cut"
8
+ spec.version = Cut::VERSION
9
+ spec.authors = ["Ryan Closner"]
10
+ spec.email = ["ryan@ryanclosner.com"]
11
+ spec.description = %q{A Scraping DSL}
12
+ spec.summary = %q{A Scraping DSL}
13
+ spec.homepage = "http://github.com/rclosner/cut"
14
+ spec.license = "MIT"
15
+
16
+ runtime_dependencies = {
17
+ 'httparty' => '~> 0.11.0',
18
+ 'nokogiri' => '~> 1.6.0',
19
+ 'virtus' => '~> 0.5.5'
20
+ }
21
+
22
+ runtime_dependencies.each {|lib, version| spec.add_runtime_dependency(lib, version) }
23
+
24
+ development_dependencies = {
25
+ 'bundler' => '~> 1.3',
26
+ 'rake' => '~> 10.1.0'
27
+ }
28
+
29
+ development_dependencies.each {|lib, version| spec.add_development_dependency(lib, version) }
30
+
31
+ spec.files = `git ls-files`.split($/)
32
+ spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
33
+ spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
34
+ spec.require_paths = ["lib"]
35
+ end
@@ -0,0 +1,46 @@
1
+ module Cut
2
+ module ClassMethods
3
+
4
+ def url(new_url)
5
+ @@url = new_url
6
+ end
7
+
8
+ def selector(new_selector)
9
+ @@selector = new_selector
10
+ end
11
+
12
+ def map(*args)
13
+ add_mapping(*args)
14
+ end
15
+
16
+ def all(options = {})
17
+ endpoint = @@url.dup
18
+ options.each {|key,value| endpoint.gsub!("{{#{key}}}", CGI.escape(value)) }
19
+ response = Client.get(endpoint)
20
+
21
+ parse(response)
22
+ end
23
+
24
+ private
25
+
26
+ def mappings
27
+ @@mappings ||= []
28
+ end
29
+
30
+ def add_mapping(name, type, options)
31
+ mappings << Mapping.new(name, options)
32
+ send(:attribute, name, type)
33
+ end
34
+
35
+ def parse(response)
36
+ response.css(@@selector).map do |node|
37
+ new.tap do |instance|
38
+ mappings.each do |mapping|
39
+ instance.send("#{mapping.name}=", node.at_css(mapping.selector).value)
40
+ end
41
+ end
42
+ end
43
+ end
44
+
45
+ end
46
+ end
data/lib/cut/client.rb ADDED
@@ -0,0 +1,21 @@
1
+ module Cut
2
+ class Client
3
+
4
+ def self.get(endpoint)
5
+ new(endpoint).get
6
+ end
7
+
8
+ def initialize(endpoint)
9
+ @endpoint = endpoint
10
+ end
11
+
12
+ def get
13
+ Response.new(HTTParty.get(endpoint))
14
+ end
15
+
16
+ private
17
+
18
+ attr_reader :endpoint
19
+
20
+ end
21
+ end
@@ -0,0 +1,4 @@
1
+ module Cut
2
+ module InstanceMethods
3
+ end
4
+ end
@@ -0,0 +1,16 @@
1
+ module Cut
2
+ class Mapping
3
+
4
+ attr_reader :name
5
+
6
+ def initialize(name, options = {})
7
+ @name = name
8
+ @selector = options[:to]
9
+ end
10
+
11
+ def selector
12
+ @selector ||= ".#{name}"
13
+ end
14
+
15
+ end
16
+ end
data/lib/cut/node.rb ADDED
@@ -0,0 +1,25 @@
1
+ module Cut
2
+ class Node
3
+
4
+ def initialize(node)
5
+ @node = node
6
+ end
7
+
8
+ def css(selector)
9
+ node.css(selector).map {|n| Node.new(n) }
10
+ end
11
+
12
+ def at_css(selector)
13
+ Node.new(node.at_css(selector))
14
+ end
15
+
16
+ def value
17
+ node.text
18
+ end
19
+
20
+ private
21
+
22
+ attr_reader :node
23
+
24
+ end
25
+ end
@@ -0,0 +1,21 @@
1
+ module Cut
2
+ class Response < Node
3
+
4
+ def initialize(response)
5
+ @response = response
6
+ end
7
+
8
+ private
9
+
10
+ attr_reader :response
11
+
12
+ def body
13
+ response.body
14
+ end
15
+
16
+ def node
17
+ @node ||= Nokogiri::HTML(body)
18
+ end
19
+
20
+ end
21
+ end
@@ -0,0 +1,3 @@
1
+ module Cut
2
+ VERSION = "0.0.1"
3
+ end
data/lib/cut.rb ADDED
@@ -0,0 +1,21 @@
1
+ require "httparty"
2
+ require "nokogiri"
3
+ require "virtus"
4
+
5
+ require "cut/version"
6
+ require "cut/class_methods"
7
+ require "cut/instance_methods"
8
+ require "cut/mapping"
9
+ require "cut/client"
10
+ require "cut/node"
11
+ require "cut/response"
12
+
13
+ module Cut
14
+
15
+ def self.included(base)
16
+ base.extend(ClassMethods)
17
+ base.send(:include, InstanceMethods)
18
+ base.send(:include, Virtus)
19
+ end
20
+
21
+ end
metadata ADDED
@@ -0,0 +1,141 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: cut
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - Ryan Closner
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2013-07-12 00:00:00.000000000 Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: httparty
16
+ requirement: !ruby/object:Gem::Requirement
17
+ none: false
18
+ requirements:
19
+ - - ~>
20
+ - !ruby/object:Gem::Version
21
+ version: 0.11.0
22
+ type: :runtime
23
+ prerelease: false
24
+ version_requirements: !ruby/object:Gem::Requirement
25
+ none: false
26
+ requirements:
27
+ - - ~>
28
+ - !ruby/object:Gem::Version
29
+ version: 0.11.0
30
+ - !ruby/object:Gem::Dependency
31
+ name: nokogiri
32
+ requirement: !ruby/object:Gem::Requirement
33
+ none: false
34
+ requirements:
35
+ - - ~>
36
+ - !ruby/object:Gem::Version
37
+ version: 1.6.0
38
+ type: :runtime
39
+ prerelease: false
40
+ version_requirements: !ruby/object:Gem::Requirement
41
+ none: false
42
+ requirements:
43
+ - - ~>
44
+ - !ruby/object:Gem::Version
45
+ version: 1.6.0
46
+ - !ruby/object:Gem::Dependency
47
+ name: virtus
48
+ requirement: !ruby/object:Gem::Requirement
49
+ none: false
50
+ requirements:
51
+ - - ~>
52
+ - !ruby/object:Gem::Version
53
+ version: 0.5.5
54
+ type: :runtime
55
+ prerelease: false
56
+ version_requirements: !ruby/object:Gem::Requirement
57
+ none: false
58
+ requirements:
59
+ - - ~>
60
+ - !ruby/object:Gem::Version
61
+ version: 0.5.5
62
+ - !ruby/object:Gem::Dependency
63
+ name: bundler
64
+ requirement: !ruby/object:Gem::Requirement
65
+ none: false
66
+ requirements:
67
+ - - ~>
68
+ - !ruby/object:Gem::Version
69
+ version: '1.3'
70
+ type: :development
71
+ prerelease: false
72
+ version_requirements: !ruby/object:Gem::Requirement
73
+ none: false
74
+ requirements:
75
+ - - ~>
76
+ - !ruby/object:Gem::Version
77
+ version: '1.3'
78
+ - !ruby/object:Gem::Dependency
79
+ name: rake
80
+ requirement: !ruby/object:Gem::Requirement
81
+ none: false
82
+ requirements:
83
+ - - ~>
84
+ - !ruby/object:Gem::Version
85
+ version: 10.1.0
86
+ type: :development
87
+ prerelease: false
88
+ version_requirements: !ruby/object:Gem::Requirement
89
+ none: false
90
+ requirements:
91
+ - - ~>
92
+ - !ruby/object:Gem::Version
93
+ version: 10.1.0
94
+ description: A Scraping DSL
95
+ email:
96
+ - ryan@ryanclosner.com
97
+ executables: []
98
+ extensions: []
99
+ extra_rdoc_files: []
100
+ files:
101
+ - .gitignore
102
+ - Gemfile
103
+ - LICENSE.txt
104
+ - README.md
105
+ - Rakefile
106
+ - cut.gemspec
107
+ - lib/cut.rb
108
+ - lib/cut/class_methods.rb
109
+ - lib/cut/client.rb
110
+ - lib/cut/instance_methods.rb
111
+ - lib/cut/mapping.rb
112
+ - lib/cut/node.rb
113
+ - lib/cut/response.rb
114
+ - lib/cut/version.rb
115
+ homepage: http://github.com/rclosner/cut
116
+ licenses:
117
+ - MIT
118
+ post_install_message:
119
+ rdoc_options: []
120
+ require_paths:
121
+ - lib
122
+ required_ruby_version: !ruby/object:Gem::Requirement
123
+ none: false
124
+ requirements:
125
+ - - ! '>='
126
+ - !ruby/object:Gem::Version
127
+ version: '0'
128
+ required_rubygems_version: !ruby/object:Gem::Requirement
129
+ none: false
130
+ requirements:
131
+ - - ! '>='
132
+ - !ruby/object:Gem::Version
133
+ version: '0'
134
+ requirements: []
135
+ rubyforge_project:
136
+ rubygems_version: 1.8.23
137
+ signing_key:
138
+ specification_version: 3
139
+ summary: A Scraping DSL
140
+ test_files: []
141
+ has_rdoc: