busgogo 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/lib/scraper.rb +79 -0
- metadata +43 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 7f27a9af8c97dbdcbca745796b9f22d0e30ffc95
|
4
|
+
data.tar.gz: 49196e672d61b56d97aa485e879b947d6e6fdc1b
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: fedb32454d733faa21327437595f5825a7b3cbde419e1b23aa12880edaee4ae563f67b006cdc2bbe9510e9fbee9e526bdba2bb003d89c05867ac987f04c51205
|
7
|
+
data.tar.gz: 5d131f9c52dfaba7e424791f3985f7049bfcc1fe6449da22152b13c0c68cd70ea817441107dc3fa3626e0d136efda8bdbbca0e43e44f5034fd308002e9345be8
|
data/lib/scraper.rb
ADDED
@@ -0,0 +1,79 @@
|
|
1
|
+
require 'open-uri'
|
2
|
+
require 'nokogiri'
|
3
|
+
require 'rubygems'
|
4
|
+
require 'mechanize'
|
5
|
+
# Class that can be used to grab data from website(http://www.hcbus.com.tw/)
|
6
|
+
class WebScraper
|
7
|
+
# @data stores information
|
8
|
+
attr_accessor :data
|
9
|
+
attr_accessor :station
|
10
|
+
attr_accessor :page
|
11
|
+
attr_accessor :output
|
12
|
+
attr_accessor :url
|
13
|
+
|
14
|
+
def initialize
|
15
|
+
@data = data
|
16
|
+
@station = station
|
17
|
+
@page = page
|
18
|
+
@output = output
|
19
|
+
@url = url
|
20
|
+
end
|
21
|
+
|
22
|
+
def getwebstructure(website)
|
23
|
+
web_data = open(website)
|
24
|
+
@data = Nokogiri.HTML(web_data)
|
25
|
+
end
|
26
|
+
|
27
|
+
def file_output
|
28
|
+
selectstation
|
29
|
+
File.write('Output.txt', @output)
|
30
|
+
puts "\n\n\nPlease check data in Output.txt file"
|
31
|
+
end
|
32
|
+
|
33
|
+
def busstation
|
34
|
+
@url = 'http://www.hcbus.com.tw/big5/service.asp'
|
35
|
+
num, @station = 1, {}
|
36
|
+
getwebstructure(url)
|
37
|
+
@data.css("select[name='jumpMenu'] option").each do |x|
|
38
|
+
@station[num] = x.text
|
39
|
+
num += 1
|
40
|
+
end
|
41
|
+
@station
|
42
|
+
end
|
43
|
+
|
44
|
+
def selectdropdown(url, num)
|
45
|
+
tmpkey = [], tmpvalue = []
|
46
|
+
agent = Mechanize.new
|
47
|
+
form = agent.get(url).forms.first
|
48
|
+
form.field_with(name: 'jumpMenu').options[num].click
|
49
|
+
@page = form.submit
|
50
|
+
content = @page.parser.xpath("//table/tr/td[@class='map-style']")
|
51
|
+
content2 = @page.parser.xpath("//table/tr/td[@class='map-style'][1]")
|
52
|
+
content2.each { |b| tmpkey << b.text.strip }
|
53
|
+
content.each { |c| tmpvalue << c.text.strip }
|
54
|
+
filehash(tmpvalue, tmpkey)
|
55
|
+
end
|
56
|
+
|
57
|
+
def filehash(value, key)
|
58
|
+
value.each do |v|
|
59
|
+
key.each do |c|
|
60
|
+
@output << '**************************************' if v == c
|
61
|
+
end
|
62
|
+
@output << v
|
63
|
+
end
|
64
|
+
@output << '**************************************'
|
65
|
+
end
|
66
|
+
|
67
|
+
def tmp_selectstation
|
68
|
+
num, @station, @output = 1, {}, []
|
69
|
+
getwebstructure(url)
|
70
|
+
@data.css("select[name='jumpMenu'] option").each do |x|
|
71
|
+
@station[num] = x.text
|
72
|
+
num += 1
|
73
|
+
end
|
74
|
+
(0...9).each do |i|
|
75
|
+
selectdropdown(@url, i)
|
76
|
+
end
|
77
|
+
@output
|
78
|
+
end
|
79
|
+
end
|
metadata
ADDED
@@ -0,0 +1,43 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: busgogo
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.1.0
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Mavis Cheng (Cheng SyunWei), Yen Wei ,Wu ChiaChun
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2014-10-24 00:00:00.000000000 Z
|
12
|
+
dependencies: []
|
13
|
+
description: We use Hsinzhu Bus Website because we often take hsinzhu bus to somewhere. Everytime we are heading for somewhere, we go to their website(http://www.hcbus.com.tw/) to get information about their service station, time, and route. Therefore, we think it will be convenience for us to use web scraper to get specific information we want.
|
14
|
+
email: wei.yen.0718@gmail.com
|
15
|
+
executables: []
|
16
|
+
extensions: []
|
17
|
+
extra_rdoc_files: []
|
18
|
+
files:
|
19
|
+
- lib/scraper.rb
|
20
|
+
homepage: http://rubygems.org/gems/busgogo
|
21
|
+
licenses: []
|
22
|
+
metadata: {}
|
23
|
+
post_install_message:
|
24
|
+
rdoc_options: []
|
25
|
+
require_paths:
|
26
|
+
- lib
|
27
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
28
|
+
requirements:
|
29
|
+
- - '>='
|
30
|
+
- !ruby/object:Gem::Version
|
31
|
+
version: '0'
|
32
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
33
|
+
requirements:
|
34
|
+
- - '>='
|
35
|
+
- !ruby/object:Gem::Version
|
36
|
+
version: '0'
|
37
|
+
requirements: []
|
38
|
+
rubyforge_project:
|
39
|
+
rubygems_version: 2.4.2
|
40
|
+
signing_key:
|
41
|
+
specification_version: 4
|
42
|
+
summary: BusGoGo
|
43
|
+
test_files: []
|