gcrawler 0.1.0 → 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 3c1b8fc3cdd83389fd819bb922e5a40c5b6d0e31efad64d44643e686dacb6903
4
- data.tar.gz: a333883b2912929e8b19d46f06e1ad3ee2cf7d455b9925cc8cce22ce6d7a30dd
3
+ metadata.gz: 0e95b1de0b3e42ef12b8b757dc7298bddc167cd7ffc2b1dd9ad50b8bf39480e9
4
+ data.tar.gz: edc2c49da4450842be6f347cf407d90cc3e3b69f00473f58352deb1361f273bb
5
5
  SHA512:
6
- metadata.gz: 45a235679f7d963eee03ddb87d8cb76bc8fd81218016443c57b95b2aae737678095eec3d6b1a1c48b281f2b29339a4048dba3c8cc702b13afeb9bb6a750befb4
7
- data.tar.gz: 3fee1f2b977bc448bee35d96736017b171f7ad4ffd4595e2e1fa488e33d914bf50341206c885363690e5be3a454f5c56207298877faac89436e9e0f0a5cfc5d7
6
+ metadata.gz: 0c6551e3e9082d57fb8ed474dd6c5ce0ca26daaa289c041f622a5acae457673b885bfa15d5d6cf81b5e1c58176ac886226c3ff1dfcc03c6877a57a557ae7023e
7
+ data.tar.gz: 52bd81434ae9974d9eb70f09a36f5716cf92f6d05c7ea5e70f71d7503d5f0ae37f84a38b9f3a44d29a0052bad5b344699f8c41ef0840ed808b50acb97fbe1f58
@@ -0,0 +1,38 @@
1
+ # This workflow uses actions that are not certified by GitHub.
2
+ # They are provided by a third-party and are governed by
3
+ # separate terms of service, privacy policy, and support
4
+ # documentation.
5
+ # This workflow will download a prebuilt Ruby version, install dependencies and run tests with Rake
6
+ # For more information see: https://github.com/marketplace/actions/setup-ruby-jruby-and-truffleruby
7
+
8
+ name: Ruby
9
+
10
+ on:
11
+ push:
12
+ branches: [ "master" ]
13
+ pull_request:
14
+ branches: [ "master" ]
15
+
16
+ permissions:
17
+ contents: read
18
+
19
+ jobs:
20
+ test:
21
+
22
+ runs-on: ubuntu-latest
23
+ strategy:
24
+ matrix:
25
+ ruby-version: ['2.7', '3.0']
26
+
27
+ steps:
28
+ - uses: actions/checkout@v3
29
+ - name: Set up Ruby
30
+ # To automatically get bug fixes and new Ruby versions for ruby/setup-ruby,
31
+ # change this to (see https://github.com/ruby/setup-ruby#versioning):
32
+ # uses: ruby/setup-ruby@v1
33
+ uses: ruby/setup-ruby@0a29871fe2b0200a17a4497bae54fe5df0d973aa # v1.115.3
34
+ with:
35
+ ruby-version: ${{ matrix.ruby-version }}
36
+ bundler-cache: true # runs 'bundle install' and caches installed gems automatically
37
+ - name: Run tests
38
+ run: COVERALLS_REPO_TOKEN=${{ secrets.REPO_TOKEN }} bundle exec rake
data/.gitignore CHANGED
@@ -9,3 +9,4 @@
9
9
 
10
10
  # rspec failure tracking
11
11
  .rspec_status
12
+ .coveralls.yml
data/CHANGELOG.md ADDED
@@ -0,0 +1,4 @@
1
+ ### version 0.1.1 (2022-09-24)
2
+
3
+ * Add logger class for stdout
4
+ * Add test specs
data/Gemfile CHANGED
@@ -1,7 +1,10 @@
1
- source "https://rubygems.org"
1
+ source 'https://rubygems.org'
2
2
 
3
3
  # Specify your gem's dependencies in gcrawler.gemspec
4
4
  gemspec
5
5
 
6
- gem "rake", "~> 12.0"
7
- gem "rspec", "~> 3.0"
6
+ gem 'coveralls_reborn', require: false
7
+ gem 'rake', '~> 12.0'
8
+ gem 'rspec', '~> 3.0'
9
+
10
+ gem 'wombat'
data/Gemfile.lock CHANGED
@@ -1,13 +1,65 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- gcrawler (0.1.0)
4
+ gcrawler (0.1.1)
5
5
 
6
6
  GEM
7
7
  remote: https://rubygems.org/
8
8
  specs:
9
+ activesupport (7.0.4)
10
+ concurrent-ruby (~> 1.0, >= 1.0.2)
11
+ i18n (>= 1.6, < 2)
12
+ minitest (>= 5.1)
13
+ tzinfo (~> 2.0)
14
+ addressable (2.8.1)
15
+ public_suffix (>= 2.0.2, < 6.0)
16
+ concurrent-ruby (1.1.10)
17
+ connection_pool (2.3.0)
18
+ coveralls_reborn (0.25.0)
19
+ simplecov (>= 0.18.1, < 0.22.0)
20
+ term-ansicolor (~> 1.6)
21
+ thor (>= 0.20.3, < 2.0)
22
+ tins (~> 1.16)
9
23
  diff-lcs (1.5.0)
24
+ docile (1.4.0)
25
+ domain_name (0.5.20190701)
26
+ unf (>= 0.0.5, < 1.0.0)
27
+ http-accept (1.7.0)
28
+ http-cookie (1.0.5)
29
+ domain_name (~> 0.5)
30
+ i18n (1.12.0)
31
+ concurrent-ruby (~> 1.0)
32
+ mechanize (2.8.5)
33
+ addressable (~> 2.8)
34
+ domain_name (~> 0.5, >= 0.5.20190701)
35
+ http-cookie (~> 1.0, >= 1.0.3)
36
+ mime-types (~> 3.0)
37
+ net-http-digest_auth (~> 1.4, >= 1.4.1)
38
+ net-http-persistent (>= 2.5.2, < 5.0.dev)
39
+ nokogiri (~> 1.11, >= 1.11.2)
40
+ rubyntlm (~> 0.6, >= 0.6.3)
41
+ webrick (~> 1.7)
42
+ webrobots (~> 0.1.2)
43
+ mime-types (3.4.1)
44
+ mime-types-data (~> 3.2015)
45
+ mime-types-data (3.2022.0105)
46
+ mini_portile2 (2.8.0)
47
+ minitest (5.16.3)
48
+ net-http-digest_auth (1.4.1)
49
+ net-http-persistent (4.0.1)
50
+ connection_pool (~> 2.2)
51
+ netrc (0.11.0)
52
+ nokogiri (1.13.8)
53
+ mini_portile2 (~> 2.8.0)
54
+ racc (~> 1.4)
55
+ public_suffix (5.0.0)
56
+ racc (1.6.0)
10
57
  rake (12.3.3)
58
+ rest-client (2.1.0)
59
+ http-accept (>= 1.7.0, < 2.0)
60
+ http-cookie (>= 1.0.2, < 2.0)
61
+ mime-types (>= 1.16, < 4.0)
62
+ netrc (~> 0.8)
11
63
  rspec (3.11.0)
12
64
  rspec-core (~> 3.11.0)
13
65
  rspec-expectations (~> 3.11.0)
@@ -21,14 +73,40 @@ GEM
21
73
  diff-lcs (>= 1.2.0, < 2.0)
22
74
  rspec-support (~> 3.11.0)
23
75
  rspec-support (3.11.1)
76
+ rubyntlm (0.6.3)
77
+ simplecov (0.21.2)
78
+ docile (~> 1.1)
79
+ simplecov-html (~> 0.11)
80
+ simplecov_json_formatter (~> 0.1)
81
+ simplecov-html (0.12.3)
82
+ simplecov_json_formatter (0.1.4)
83
+ sync (0.5.0)
84
+ term-ansicolor (1.7.1)
85
+ tins (~> 1.0)
86
+ thor (1.2.1)
87
+ tins (1.31.1)
88
+ sync
89
+ tzinfo (2.0.5)
90
+ concurrent-ruby (~> 1.0)
91
+ unf (0.1.4)
92
+ unf_ext
93
+ unf_ext (0.0.8.2)
94
+ webrick (1.7.0)
95
+ webrobots (0.1.2)
96
+ wombat (3.0.0)
97
+ activesupport
98
+ mechanize (~> 2.8.5)
99
+ rest-client
24
100
 
25
101
  PLATFORMS
26
102
  ruby
27
103
 
28
104
  DEPENDENCIES
105
+ coveralls_reborn
29
106
  gcrawler!
30
107
  rake (~> 12.0)
31
108
  rspec (~> 3.0)
109
+ wombat
32
110
 
33
111
  BUNDLED WITH
34
112
  2.1.4
data/README.md CHANGED
@@ -1,6 +1,9 @@
1
1
  # Gcrawler
2
2
 
3
- Google search crawler for Ruby version.
3
+ [![Gem Version](https://badge.fury.io/rb/gcrawler.svg)](https://badge.fury.io/rb/gcrawler)
4
+ [![Coverage Status](https://coveralls.io/repos/github/rogerluo410/gcrawler/badge.svg?branch=master)](https://coveralls.io/github/rogerluo410/gcrawler?branch=master)
5
+
6
+ Google search crawler for Ruby version. Crawling each links' text and url by keywords on Google.com.
4
7
 
5
8
  ## Installation
6
9
 
@@ -43,6 +46,9 @@ Or install it yourself as:
43
46
  # Output: ['url1', 'url2', ...]
44
47
  pp google_crawler.search_as_url('お肉とチーズの専門店', 'ミートダルマ札幌店', country: 'ja')
45
48
 
49
+ # Get the second page:
50
+ pp google_crawler.search_as_url('お肉とチーズの専門店', 'ミートダルマ札幌店', country: 'ja', start: 10)
51
+
46
52
  ```
47
53
 
48
54
  Function args definition:
@@ -6,8 +6,11 @@
6
6
 
7
7
  require 'wombat'
8
8
  require 'uri'
9
+ require 'logger'
9
10
  require_relative './utils'
10
11
 
12
+ LOGGER = Logger.new(STDOUT)
13
+
11
14
  # Crawl action
12
15
  class Crawler
13
16
  include Wombat::Crawler
@@ -27,7 +30,7 @@ class Crawler
27
30
  mechanize.set_proxy(*proxy) if proxy.length == 2
28
31
  mechanize.user_agent = user_agent
29
32
 
30
- pp "proxy: #{proxy}, user_agent: #{user_agent}"
33
+ LOGGER.info "proxy: #{proxy}, user_agent: #{user_agent}"
31
34
  end
32
35
  end
33
36
 
@@ -80,14 +83,14 @@ class GoogleCrawler
80
83
  @crawler.query_str(query_str)
81
84
 
82
85
  seconds = Utils.random_interval_time
83
- pp "Crawling query string is #{query_str}, will be crawling after #{seconds} seconds..."
86
+ LOGGER.info "Crawling query string is #{query_str}, will be crawling after #{seconds} seconds..."
84
87
  sleep(seconds)
85
88
 
86
89
  @crawler.crawl
87
90
 
88
91
  raise "Fetch on Google failed with code #{@crawler.response_code}" unless @crawler.response_code == 200
89
92
 
90
- pp 'Crawl on Google successfully...'
93
+ LOGGER.info 'Crawl on Google successfully...'
91
94
  end
92
95
 
93
96
  private
@@ -1,3 +1,3 @@
1
1
  module Gcrawler
2
- VERSION = "0.1.0"
2
+ VERSION = '0.1.1'
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: gcrawler
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0
4
+ version: 0.1.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - rogerluo410
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2022-09-23 00:00:00.000000000 Z
11
+ date: 2022-09-25 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description: Crawling link text and link url by keywords on Google.com.
14
14
  email:
@@ -17,9 +17,11 @@ executables: []
17
17
  extensions: []
18
18
  extra_rdoc_files: []
19
19
  files:
20
+ - ".github/workflows/ruby.yml"
20
21
  - ".gitignore"
21
22
  - ".rspec"
22
23
  - ".travis.yml"
24
+ - CHANGELOG.md
23
25
  - CODE_OF_CONDUCT.md
24
26
  - Gemfile
25
27
  - Gemfile.lock