husc 0.1.0 → 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 18698eb5d732d0d86377f031fd3dda73ff368c7ad4b10f9c7a5a45e08f325a98
4
- data.tar.gz: 9907d28762adb13ac57ae7da4a530f23ce57794f71fb2a0c502a54a53d8b73f7
3
+ metadata.gz: 70da435ce2b15bb485ce958997a91488cf19f00b7faf5da3f25c92a891028508
4
+ data.tar.gz: b363fa06b547c1a5612889739465af7ea46cbeea6b6922aab0d484fa21169ce6
5
5
  SHA512:
6
- metadata.gz: 698671388fa2b6da20b2af46e24d86eaad31f6d06a28633d83795eaf9b33473ad65c36a75791a7434414a9f64f895409b1097a786b9c79cb2e4669bb6d73140a
7
- data.tar.gz: e856cb96440b8faa60f159becc6cdf26418462c57add0d46ffce5fb40f4d6193689fb05eff0867de144f5e175df60e398a1238b75bc1cf097eed21aa7ad6a875
6
+ metadata.gz: 2190d0269954730626eca1d142c8ffd00614bf98fb19d7ba4d595668e944d769636f26c151211a664ba775b2d9f1c3b5594bbd7d5cde05eec7af31e11f82dfff
7
+ data.tar.gz: 5f0ea49baf5d2fbf6a707f3b580e95364686c2a3e1439d8c3469c090335790ebeef062176e3d67e3afd7592b23e2256f92f6421a14e4b208c94b4e5a1e737a7f
data/README.md CHANGED
@@ -1,7 +1,7 @@
1
- Crawler
1
+ Husc
2
2
  =======
3
3
 
4
- Script for crawling in Ruby
4
+ A simple crawling utility for Ruby.
5
5
 
6
6
 
7
7
  ## Description
@@ -16,29 +16,74 @@ This project enables site crawling and data extraction with xpath and css select
16
16
  ## Usage
17
17
  ### Simple Example
18
18
  ```ruby
19
- require './rbcrawl.rb'
19
+ require 'husc'
20
20
 
21
21
  url = 'http://www.example.com/'
22
- doc = RbCrawl.new(url)
22
+ doc = Husc(url)
23
23
 
24
- # Search for nodes by css
24
+ # access another url
25
+ doc.get('another url')
26
+
27
+ # get current url
28
+ doc.url
29
+
30
+ # get current site's html
31
+ doc.html
32
+
33
+ # get <table> tags as dict
34
+ doc.tables
35
+ # ex) doc.tables['予約・お問い合わせ'] => 050-5596-6465
36
+ ```
37
+
38
+ ### Scraping Example
39
+ ```ruby
40
+ # search for nodes by css selector
41
+ # tag : css('name')
42
+ # class : css('.name')
43
+ # id : css('#name')
25
44
  doc.css('div')
26
45
  doc.css('.main-text')
27
46
  doc.css('#tadjs')
28
47
 
29
- # Search for nodes by xpath
48
+ # search for nodes by xpath
30
49
  doc.xpath('//*[@id="top"]/div[1]')
31
50
 
32
- # Others
33
- doc.css('div').css('a')[2].attr('href')
34
- doc.css('p').innerText()
35
- doc.tables # -> Table Tag to Dict
36
-
51
+ # other example
52
+ doc.css('div').css('a')[2].attr('href') # => string object
53
+ doc.css('p').innerText() # => string object
37
54
  # You do not need to specify "[]" to access the first index
38
55
  ```
39
56
 
57
+ ### Submitting Form Example
58
+ 1. Specify target node's attribute
59
+ 2. Specify value(int or str) / check(bool) / file_name(str)
60
+ 3. call submit() with form attribute specified
61
+ ```ruby
62
+ # login
63
+ doc.send(id:'id attribute', value:'value to send')
64
+ doc.send(id:'id attribute', value:'value to send')
65
+ doc.submit(id:'id attribute') # submit
66
+
67
+ # post file
68
+ doc.send(id:'id attribute', file_name:'target file name')
69
+
70
+ # checkbox
71
+ doc.send(id:'id attribute', check:True) # check
72
+ doc.send(id:'id attribute', check:False) # uncheck
73
+
74
+ # example of specify other attribute
75
+ doc.send(name:'name attribute', value:'hello')
76
+ doc.send(class:'class attribute', value:100)
77
+ ```
78
+
79
+
80
+
40
81
 
41
82
  ## Installation
42
83
  ```sh
43
84
  $ gem install husc
44
- ```
85
+ ```
86
+
87
+
88
+ ## Contributing
89
+ Bug reports and pull requests are welcome on GitHub at [https://github.com/AjxLab/PyCrawl](https://github.com/AjxLab/PyCrawl).
data/husc.gemspec CHANGED
@@ -13,7 +13,7 @@ Gem::Specification.new do |spec|
13
13
  spec.required_ruby_version = Gem::Requirement.new(">= 2.3.0")
14
14
 
15
15
  spec.metadata["homepage_uri"] = spec.homepage
16
- spec.metadata["source_code_uri"] = "https://github.com/AjxLab/Crawler."
16
+ spec.metadata["source_code_uri"] = "https://github.com/AjxLab/husc"
17
17
 
18
18
  # Specify which files should be added to the gem when it is released.
19
19
  # The `git ls-files -z` loads the files in the RubyGem that have been added into git.
data/lib/husc/version.rb CHANGED
@@ -1,3 +1,3 @@
1
1
  module Husc
2
- VERSION = "0.1.0"
2
+ VERSION = "0.1.1"
3
3
  end
data/lib/husc.rb CHANGED
@@ -3,11 +3,12 @@ require 'mechanize'
3
3
  require 'nokogiri'
4
4
  require 'net/http'
5
5
  require 'kconv'
6
- require "husc/version"
6
+ require 'husc/version'
7
7
 
8
8
  module Husc
9
9
  class Error < StandardError; end
10
- class Husc
10
+
11
+ class Crawler
11
12
  attr_reader :url, :html, :tables, :params
12
13
 
13
14
  # 特殊配列
@@ -28,7 +29,7 @@ module Husc
28
29
 
29
30
  def method_missing(method, *args)
30
31
  if self == []
31
- return eval("Husc.new(doc: nil).#{method}(*#{args})")
32
+ return eval("Crawler.new(doc: nil).#{method}(*#{args})")
32
33
  end
33
34
 
34
35
  return eval("self[0].#{method}(*#{args})")
@@ -118,7 +119,7 @@ module Husc
118
119
 
119
120
  def xpath(locator, single = false)
120
121
  ## -----*----- HTMLからXPath指定で要素取得 -----*----- ##
121
- elements = CrawlArray.new(@doc.xpath(locator).map {|el| Husc.new(doc: el)})
122
+ elements = CrawlArray.new(@doc.xpath(locator).map {|el| Crawler.new(doc: el)})
122
123
  if single
123
124
  # シングルノード
124
125
  if elements[0] == nil
@@ -134,7 +135,7 @@ module Husc
134
135
 
135
136
  def css(locator, single = false)
136
137
  ## -----*----- HTMLからCSSセレクタで要素取得 -----*----- ##
137
- elements = CrawlArray.new(@doc.css(locator).map {|el| Husc.new(doc: el)})
138
+ elements = CrawlArray.new(@doc.css(locator).map {|el| Crawler.new(doc: el)})
138
139
  if single
139
140
  # シングルノード
140
141
  if elements[0] == nil
@@ -211,3 +212,4 @@ module Husc
211
212
  end
212
213
  end
213
214
  end
215
+
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: husc
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0
4
+ version: 0.1.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Tatsuya Abe
@@ -1077,7 +1077,7 @@ licenses:
1077
1077
  - MIT
1078
1078
  metadata:
1079
1079
  homepage_uri: https://github.com/AjxLab/husc
1080
- source_code_uri: https://github.com/AjxLab/Crawler.
1080
+ source_code_uri: https://github.com/AjxLab/husc
1081
1081
  post_install_message:
1082
1082
  rdoc_options: []
1083
1083
  require_paths: