web_reptile 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 889d004908345db250a70bb98af7d316c5a78acb
4
+ data.tar.gz: 8ca6a65a23367a6e08b3b4311a690bdfecacbb09
5
+ SHA512:
6
+ metadata.gz: 1d02c6e675541da068296120e4a0525de4bcbff6e93880d2aeefb615fd791f7df9e6ba7d3e59b3a4d5f7f1e8c315831b00a55a375c3220aa8b07bdbb5d49d7b6
7
+ data.tar.gz: 47bac14d750fae6712366afb419313214513ec368674836cadd255718b1c5837f8f2552ab0353a6d0d72fcc54a3bb3b181d3676ea943982c872e91c5f0a33c24
@@ -0,0 +1,10 @@
1
+ /.bundle/
2
+ /.yardoc
3
+ /Gemfile.lock
4
+ /_yardoc/
5
+ /coverage/
6
+ /doc/
7
+ /pkg/
8
+ /spec/reports/
9
+ /tmp/
10
+ *.gem
data/.rspec ADDED
@@ -0,0 +1,2 @@
1
+ --format documentation
2
+ --color
@@ -0,0 +1,10 @@
1
+ language: ruby
2
+ rvm:
3
+ - 2.3.0
4
+ before_install: gem install bundler -v 1.11.2
5
+ script: bundle exec rspec spec
6
+ deploy:
7
+ provider: rubygems
8
+ api_key: "0a83ba9104db0bc50ca2b67e3fc67406"
9
+ on:
10
+ tags: true
data/Gemfile ADDED
@@ -0,0 +1,6 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in reptile.gemspec
4
+ gemspec
5
+
6
+ gem "mechanize"
@@ -0,0 +1,48 @@
1
+ # WebReptile
2
+
3
+ [![Travis](https://img.shields.io/travis/DotHide/reptile.svg?maxAge=2592000)](https://travis-ci.org/DotHide/reptile)
4
+
5
+ WebReptile is a web spider framework using Ruby.
6
+
7
+ ## Installation
8
+
9
+ Add this line to your application's Gemfile:
10
+
11
+ ```ruby
12
+ gem 'web_reptile'
13
+ ```
14
+
15
+ And then execute:
16
+
17
+ $ bundle
18
+
19
+ Or install it yourself as:
20
+
21
+ $ gem install web_reptile
22
+
23
+ ## Usage
24
+
25
+ ```ruby
26
+ require('web_reptile')
27
+
28
+ domain = WebReptile.url("http://www.jd.com/allSort.aspx")
29
+ items = domain.grab(".category-items .category-item .items dt a")
30
+ items.map do |item|
31
+ href = item.attr('href')
32
+ text = item.text
33
+ "[#{text}](#{href})"
34
+ end
35
+
36
+ # ["[电子书刊](//e.jd.com/ebook.html)", "[音像](//mvd.jd.com/)", "[英文原版](//channel.jd.com/1713-4855.html)", "[文艺](//channel.jd.com/p_wenxuezongheguan.html)", "[少儿](//book.jd.com/children.html)", "[人文社科](//book.jd.com/library/socialscience.html)", "[经管励志](//channel.jd.com/p_Comprehensive.html)", ...]
37
+ ```
38
+
39
+ ## Development
40
+
41
+ After checking out the repo, run `bin/setup` to install dependencies. Then, run `rake spec` to run the tests. You can also run `bin/console` for an interactive prompt that will allow you to experiment.
42
+
43
+ To install this gem onto your local machine, run `bundle exec rake install`. To release a new version, update the version number in `version.rb`, and then run `bundle exec rake release`, which will create a git tag for the version, push git commits and tags, and push the `.gem` file to [rubygems.org](https://rubygems.org).
44
+
45
+ ## Contributing
46
+
47
+ Bug reports and pull requests are welcome on GitHub at https://github.com/DotHide/web_reptile.
48
+
@@ -0,0 +1,6 @@
1
+ require "bundler/gem_tasks"
2
+ require "rspec/core/rake_task"
3
+
4
+ RSpec::Core::RakeTask.new(:spec)
5
+
6
+ task :default => :spec
@@ -0,0 +1,14 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require "bundler/setup"
4
+ require "reptile"
5
+
6
+ # You can add fixtures and/or initialization code here to make experimenting
7
+ # with your gem easier. You can also use a different console, if you like.
8
+
9
+ # (If you use this, don't forget to add pry to your Gemfile!)
10
+ # require "pry"
11
+ # Pry.start
12
+
13
+ require "irb"
14
+ IRB.start
@@ -0,0 +1,8 @@
1
+ #!/usr/bin/env bash
2
+ set -euo pipefail
3
+ IFS=$'\n\t'
4
+ set -vx
5
+
6
+ bundle install
7
+
8
+ # Do any other automated setup that you need to do here
@@ -0,0 +1,30 @@
1
+ require "web_reptile/version"
2
+ require "mechanize"
3
+
4
+ module WebReptile
5
+
6
+ def WebReptile.url(url, options = {}, &block)
7
+ Core.url(url, options, &block)
8
+ end
9
+
10
+ class Core
11
+ def initialize(url, opts = {})
12
+ @url = url.is_a?(URI) ? url : URI(url)
13
+ @opts = opts
14
+
15
+ agent = Mechanize.new
16
+ @page = agent.get(@url)
17
+ end
18
+
19
+ def self.url(url, opts = {})
20
+ self.new(url, opts) do |web|
21
+ yield web if block_given?
22
+ self
23
+ end
24
+ end
25
+
26
+ def grab(selector)
27
+ items = @page.search(selector)
28
+ end
29
+ end
30
+ end
@@ -0,0 +1,3 @@
1
+ module WebReptile
2
+ VERSION = "0.1.0"
3
+ end
@@ -0,0 +1,23 @@
1
+ # coding: utf-8
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'web_reptile/version'
5
+
6
+ Gem::Specification.new do |spec|
7
+ spec.name = "web_reptile"
8
+ spec.version = WebReptile::VERSION
9
+ spec.authors = ["Martin_nett"]
10
+ spec.email = ["dothide@gmail.com"]
11
+
12
+ spec.summary = %q{WebReptile is a web spider framework using Ruby.}
13
+ spec.homepage = "https://github.com/DotHide/reptile"
14
+
15
+ spec.files = `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
16
+ spec.bindir = "exe"
17
+ spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
18
+ spec.require_paths = ["lib"]
19
+
20
+ spec.add_development_dependency "bundler", "~> 1.11"
21
+ spec.add_development_dependency "rake", "~> 10.0"
22
+ spec.add_development_dependency "rspec", "~> 3.0"
23
+ end
metadata ADDED
@@ -0,0 +1,96 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: web_reptile
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - Martin_nett
8
+ autorequire:
9
+ bindir: exe
10
+ cert_chain: []
11
+ date: 2016-06-21 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: bundler
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: '1.11'
20
+ type: :development
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: '1.11'
27
+ - !ruby/object:Gem::Dependency
28
+ name: rake
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: '10.0'
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: '10.0'
41
+ - !ruby/object:Gem::Dependency
42
+ name: rspec
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - "~>"
46
+ - !ruby/object:Gem::Version
47
+ version: '3.0'
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - "~>"
53
+ - !ruby/object:Gem::Version
54
+ version: '3.0'
55
+ description:
56
+ email:
57
+ - dothide@gmail.com
58
+ executables: []
59
+ extensions: []
60
+ extra_rdoc_files: []
61
+ files:
62
+ - ".gitignore"
63
+ - ".rspec"
64
+ - ".travis.yml"
65
+ - Gemfile
66
+ - README.md
67
+ - Rakefile
68
+ - bin/console
69
+ - bin/setup
70
+ - lib/web_reptile.rb
71
+ - lib/web_reptile/version.rb
72
+ - web_reptile.gemspec
73
+ homepage: https://github.com/DotHide/reptile
74
+ licenses: []
75
+ metadata: {}
76
+ post_install_message:
77
+ rdoc_options: []
78
+ require_paths:
79
+ - lib
80
+ required_ruby_version: !ruby/object:Gem::Requirement
81
+ requirements:
82
+ - - ">="
83
+ - !ruby/object:Gem::Version
84
+ version: '0'
85
+ required_rubygems_version: !ruby/object:Gem::Requirement
86
+ requirements:
87
+ - - ">="
88
+ - !ruby/object:Gem::Version
89
+ version: '0'
90
+ requirements: []
91
+ rubyforge_project:
92
+ rubygems_version: 2.6.4
93
+ signing_key:
94
+ specification_version: 4
95
+ summary: WebReptile is a web spider framework using Ruby.
96
+ test_files: []