arb-crawler 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 1edbf13a9ded81eb25cf49a3d8f42abf38d86f0f
4
+ data.tar.gz: fef12e014d16904d3b5c7b99bdbfc30125985b22
5
+ SHA512:
6
+ metadata.gz: 6938b1e9a8b5270dd7068e9d5c541548905c9ce63259d3b32b80b0a924da79e804c03f155915657d1f8d83eb0ecb25c11a9179397cf6fe53361e89dbc57b7def
7
+ data.tar.gz: f603f483c106a317df770b49bd14ca6d08163ae73208cfb82b23aa58798a6f9647dc05b2ff257a3b4550f2689e11d34b0b695fa028b5fd08dc306bc581bfd428
data/.gitignore ADDED
@@ -0,0 +1,9 @@
1
+ /.bundle/
2
+ /.yardoc
3
+ /Gemfile.lock
4
+ /_yardoc/
5
+ /coverage/
6
+ /doc/
7
+ /pkg/
8
+ /spec/reports/
9
+ /tmp/
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in arb-crawler.gemspec
4
+ gemspec
data/README.md ADDED
@@ -0,0 +1,36 @@
1
+ # Arb::Crawler
2
+
3
+ Welcome to your new gem! In this directory, you'll find the files you need to be able to package up your Ruby library into a gem. Put your Ruby code in the file `lib/arb/crawler`. To experiment with that code, run `bin/console` for an interactive prompt.
4
+
5
+ TODO: Delete this and the text above, and describe your gem
6
+
7
+ ## Installation
8
+
9
+ Add this line to your application's Gemfile:
10
+
11
+ ```ruby
12
+ gem 'arb-crawler'
13
+ ```
14
+
15
+ And then execute:
16
+
17
+ $ bundle
18
+
19
+ Or install it yourself as:
20
+
21
+ $ gem install arb-crawler
22
+
23
+ ## Usage
24
+
25
+ TODO: Write usage instructions here
26
+
27
+ ## Development
28
+
29
+ After checking out the repo, run `bin/setup` to install dependencies. You can also run `bin/console` for an interactive prompt that will allow you to experiment.
30
+
31
+ To install this gem onto your local machine, run `bundle exec rake install`. To release a new version, update the version number in `version.rb`, and then run `bundle exec rake release`, which will create a git tag for the version, push git commits and tags, and push the `.gem` file to [rubygems.org](https://rubygems.org).
32
+
33
+ ## Contributing
34
+
35
+ Bug reports and pull requests are welcome on GitHub at https://github.com/arybin/arb-crawler.
36
+
data/Rakefile ADDED
@@ -0,0 +1,2 @@
1
+ require "bundler/gem_tasks"
2
+ task :default => :spec
@@ -0,0 +1,29 @@
1
+ # coding: utf-8
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'arb/crawler/version'
5
+
6
+ Gem::Specification.new do |spec|
7
+ spec.name = "arb-crawler"
8
+ spec.version = Arb::Crawler::VERSION
9
+ spec.authors = ["arybin"]
10
+ spec.email = ["arybin@163.com"]
11
+
12
+ spec.summary = %q{Web page crawler.}
13
+ spec.description = %q{Web page crawler.}
14
+ spec.homepage = "https://github.com/arybin-cn/arb-crawler"
15
+
16
+
17
+ spec.files = `git ls-files -z`.split("\x0").reject do |f|
18
+ f.match(%r{^(test|spec|features)/})
19
+ end
20
+ spec.bindir = "exe"
21
+ spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
22
+ spec.require_paths = ["lib"]
23
+
24
+ spec.add_development_dependency "bundler", "~> 1.14"
25
+ spec.add_development_dependency "rake", "~> 10.0"
26
+ spec.add_dependency "arb-str"
27
+ spec.add_dependency "nokogiri"
28
+ spec.add_dependency "httpclient"
29
+ end
data/bin/console ADDED
@@ -0,0 +1,14 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require "bundler/setup"
4
+ require "arb/crawler"
5
+
6
+ # You can add fixtures and/or initialization code here to make experimenting
7
+ # with your gem easier. You can also use a different console, if you like.
8
+
9
+ # (If you use this, don't forget to add pry to your Gemfile!)
10
+ # require "pry"
11
+ # Pry.start
12
+
13
+ require "irb"
14
+ IRB.start(__FILE__)
data/bin/setup ADDED
@@ -0,0 +1,8 @@
1
+ #!/usr/bin/env bash
2
+ set -euo pipefail
3
+ IFS=$'\n\t'
4
+ set -vx
5
+
6
+ bundle install
7
+
8
+ # Do any other automated setup that you need to do here
@@ -0,0 +1,5 @@
1
+ module Arb
2
+ module Crawler
3
+ VERSION = "1.0.0"
4
+ end
5
+ end
@@ -0,0 +1,67 @@
1
+ require "arb/crawler/version"
2
+
3
+ require 'nokogiri'
4
+ require 'httpclient'
5
+ require 'arb/str'
6
+
7
+ module Arb
8
+ module Crawler
9
+ class << self
10
+ client=HTTPClient.new
11
+ methods=%w{delete get post put}
12
+ ways=%w{css xpath}
13
+
14
+ define_method :default_client do
15
+ client
16
+ end
17
+
18
+ define_method :filename_of_url do |url|
19
+ url && url[url.rindex('/')+1..-1]
20
+ end
21
+
22
+ define_method :download do |url,file|
23
+ begin
24
+ File.open file,'wb+' do |file|
25
+ file<<client.get(url).body
26
+ end
27
+ rescue Exception=>e
28
+ $stderr.puts e
29
+ false
30
+ end
31
+ true
32
+ end
33
+
34
+ methods.each do |method|
35
+ ways.each do |way|
36
+ define_method "#{method}_by_#{way}_raw" do |url,css_or_xpath,&blk|
37
+ ::Nokogiri.parse(client.send(method,url).body).send(way,css_or_xpath).tap do |res|
38
+ if blk
39
+ res.each do |e|
40
+ blk[e]
41
+ end
42
+ end
43
+ end
44
+ end
45
+
46
+ define_method "#{method}_by_#{way}" do |url,css_or_xpath,&blk|
47
+ [].tap do |arr|
48
+ send("#{method}_by_#{way}_raw",url,css_or_xpath).each do |nokogiri_element|
49
+ arr<<Hash.new.tap do |hash|
50
+ nokogiri_element.attributes.keys.each do |key|
51
+ hash[key.to_sym]=nokogiri_element.attribute(key).value
52
+ end
53
+ hash.singleton_class.send :define_method, :text do
54
+ nokogiri_element.text
55
+ end
56
+ blk[e] if blk
57
+ end
58
+ end
59
+ end
60
+ end
61
+
62
+ end
63
+ end
64
+ end
65
+
66
+ end
67
+ end
metadata ADDED
@@ -0,0 +1,122 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: arb-crawler
3
+ version: !ruby/object:Gem::Version
4
+ version: 1.0.0
5
+ platform: ruby
6
+ authors:
7
+ - arybin
8
+ autorequire:
9
+ bindir: exe
10
+ cert_chain: []
11
+ date: 2017-02-12 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: bundler
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: '1.14'
20
+ type: :development
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: '1.14'
27
+ - !ruby/object:Gem::Dependency
28
+ name: rake
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: '10.0'
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: '10.0'
41
+ - !ruby/object:Gem::Dependency
42
+ name: arb-str
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - ">="
46
+ - !ruby/object:Gem::Version
47
+ version: '0'
48
+ type: :runtime
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - ">="
53
+ - !ruby/object:Gem::Version
54
+ version: '0'
55
+ - !ruby/object:Gem::Dependency
56
+ name: nokogiri
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - ">="
60
+ - !ruby/object:Gem::Version
61
+ version: '0'
62
+ type: :runtime
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - ">="
67
+ - !ruby/object:Gem::Version
68
+ version: '0'
69
+ - !ruby/object:Gem::Dependency
70
+ name: httpclient
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - ">="
74
+ - !ruby/object:Gem::Version
75
+ version: '0'
76
+ type: :runtime
77
+ prerelease: false
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - ">="
81
+ - !ruby/object:Gem::Version
82
+ version: '0'
83
+ description: Web page crawler.
84
+ email:
85
+ - arybin@163.com
86
+ executables: []
87
+ extensions: []
88
+ extra_rdoc_files: []
89
+ files:
90
+ - ".gitignore"
91
+ - Gemfile
92
+ - README.md
93
+ - Rakefile
94
+ - arb-crawler.gemspec
95
+ - bin/console
96
+ - bin/setup
97
+ - lib/arb/crawler.rb
98
+ - lib/arb/crawler/version.rb
99
+ homepage: https://github.com/arybin-cn/arb-crawler
100
+ licenses: []
101
+ metadata: {}
102
+ post_install_message:
103
+ rdoc_options: []
104
+ require_paths:
105
+ - lib
106
+ required_ruby_version: !ruby/object:Gem::Requirement
107
+ requirements:
108
+ - - ">="
109
+ - !ruby/object:Gem::Version
110
+ version: '0'
111
+ required_rubygems_version: !ruby/object:Gem::Requirement
112
+ requirements:
113
+ - - ">="
114
+ - !ruby/object:Gem::Version
115
+ version: '0'
116
+ requirements: []
117
+ rubyforge_project:
118
+ rubygems_version: 2.4.8
119
+ signing_key:
120
+ specification_version: 4
121
+ summary: Web page crawler.
122
+ test_files: []