web_scraping_hw3 0.1.1 → 0.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +18 -4
- data/README.md +4 -1
- data/lib/web_scraping_hw3.rb +46 -21
- data/lib/web_scraping_hw3/version.rb +1 -1
- data/web_scraping_hw3.gemspec +8 -0
- metadata +44 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 8ee22802b8989c0ef31b5023dc5241567fc06737aed4e90ca17ad194d07a6ae4
|
4
|
+
data.tar.gz: 12d6cb27f8e2dfaa3276887117901866230cfb0207e40eadfb45501e371243d4
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: f9fc1f53c42d6b5dd32c6fa1b1e99bbb1e75fa61d727266ae2f4ae0cb9333d1302df40aeba614ab968e3b3376cf3c7590d320d2809d105255347176e7b418a29
|
7
|
+
data.tar.gz: 3dc8f234fa68c013c9fc627afb03bff006a56fc0f676f9f35948d38a665c01af636a6a0ca97435c4983255097f26404635305155a0826786430b872ac0edb571
|
data/CHANGELOG.md
CHANGED
@@ -5,12 +5,18 @@ All notable changes to this project will be documented in this file.
|
|
5
5
|
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres
|
6
6
|
to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
7
7
|
|
8
|
-
## [
|
8
|
+
## [Unreleased]
|
9
|
+
|
10
|
+
## [0.1.2] - 2021-02-05
|
9
11
|
|
10
12
|
### Added
|
11
13
|
|
12
|
-
-
|
13
|
-
-
|
14
|
+
- Make the Web scraping in function
|
15
|
+
- Can run this gem now read more information in [Readme Usage](https://github.com/james31366/Web-Scraping-HW3-SSD#usage)
|
16
|
+
|
17
|
+
### Changed
|
18
|
+
|
19
|
+
- Move the sort the newer version first in CHANGELOG.md
|
14
20
|
|
15
21
|
## [0.1.1] - 2021-02-04
|
16
22
|
|
@@ -18,4 +24,12 @@ to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
|
18
24
|
|
19
25
|
- Changed the date in CHANGELOG.md to published date.
|
20
26
|
- Add some information of the gem description.
|
21
|
-
- Add surname in author.
|
27
|
+
- Add surname in author.
|
28
|
+
|
29
|
+
## [0.1.0] - 2021-02-04
|
30
|
+
|
31
|
+
### Added
|
32
|
+
|
33
|
+
- Add the scraping for all list of corporation
|
34
|
+
- Scrape only the Asset of each corporations
|
35
|
+
|
data/README.md
CHANGED
data/lib/web_scraping_hw3.rb
CHANGED
@@ -13,31 +13,56 @@ module WebScrapingHw3
|
|
13
13
|
|
14
14
|
BASE_URL = "https://www.set.or.th"
|
15
15
|
|
16
|
-
#
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
16
|
+
# Class to Scrape the set website
|
17
|
+
class Scrape
|
18
|
+
# Main function to scrape website.
|
19
|
+
# and print the corporation name and asset cost.
|
20
|
+
# @return [nil]
|
21
|
+
def self.main_scrape
|
22
|
+
url = "#{BASE_URL}/set/commonslookup.do"
|
23
|
+
companies_pages = self.find_companies_page(url)
|
24
|
+
|
25
|
+
companies_pages.each do |companies_page|
|
26
|
+
companies_table = self.find_companies_table(companies_page)
|
27
|
+
|
28
|
+
companies_table.each do |company_tag_a|
|
29
|
+
url_stock_highlight_page = self.find_company_highlight(company_tag_a)
|
30
|
+
stock_url = "#{BASE_URL}#{url_stock_highlight_page}"
|
31
|
+
self.print_asset(stock_url)
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
private
|
24
37
|
|
25
|
-
|
26
|
-
|
27
|
-
|
38
|
+
def self.find_companies_page(url)
|
39
|
+
parsed_page = make_parsed(url)
|
40
|
+
parsed_page.css("div.col-xs-12.padding-top-10.text-center.capital-letter").css("a")
|
41
|
+
end
|
28
42
|
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
43
|
+
def self.find_companies_table(companies_page)
|
44
|
+
companies_url = "#{BASE_URL}#{companies_page.attributes["href"].value}"
|
45
|
+
companies_parsed_page = make_parsed(companies_url)
|
46
|
+
companies_parsed_page.css("table.table-profile.table-hover.table-set-border-yellow").css("a")
|
47
|
+
end
|
33
48
|
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
49
|
+
# Convert any url link from string to nokogiri html.
|
50
|
+
# To use with any nokogiri tools.
|
51
|
+
# @param [String] url
|
52
|
+
# @return [HTML]
|
53
|
+
def self.make_parsed(url)
|
54
|
+
unparsed_page = HTTParty.get(url)
|
55
|
+
Nokogiri::HTML(unparsed_page.body)
|
56
|
+
end
|
57
|
+
|
58
|
+
def self.find_company_highlight(company_tag_a)
|
59
|
+
stock_profile_page_url = "#{BASE_URL}#{company_tag_a.attributes["href"].value}"
|
60
|
+
stock_profile_page = make_parsed(stock_profile_page_url)
|
61
|
+
stock_profile_page.css("ul.nav.nav-tabs.set-nav-tabs")
|
62
|
+
.css("a")[1].attributes["href"].value
|
63
|
+
end
|
39
64
|
|
40
|
-
|
65
|
+
def self.print_asset(stock_url)
|
41
66
|
stock_parsed_page = make_parsed(stock_url)
|
42
67
|
|
43
68
|
name = stock_parsed_page.css("div.col-xs-12.col-md-12.col-lg-8").css("h3").text
|
data/web_scraping_hw3.gemspec
CHANGED
@@ -1,5 +1,8 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
|
+
# Vichisorn Wejsupakul
|
4
|
+
# 6210546013
|
5
|
+
|
3
6
|
require_relative "lib/web_scraping_hw3/version"
|
4
7
|
|
5
8
|
Gem::Specification.new do |spec|
|
@@ -32,6 +35,11 @@ Gem::Specification.new do |spec|
|
|
32
35
|
spec.add_dependency "httparty", "~> 0.18.1"
|
33
36
|
spec.add_dependency "nokogiri", "~> 1.11.1"
|
34
37
|
|
38
|
+
# Development dependency gem
|
39
|
+
spec.add_development_dependency "bundler", "~> 1.17"
|
40
|
+
spec.add_development_dependency "rake", "~> 10.0"
|
41
|
+
spec.add_development_dependency "minitest", "~> 5.0"
|
42
|
+
|
35
43
|
# For more information and examples about making a new gem, checkout our
|
36
44
|
# guide at: https://bundler.io/guides/creating_gem.html
|
37
45
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: web_scraping_hw3
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Vichisorn Wejsupakul
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2021-02-
|
11
|
+
date: 2021-02-05 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: httparty
|
@@ -38,6 +38,48 @@ dependencies:
|
|
38
38
|
- - "~>"
|
39
39
|
- !ruby/object:Gem::Version
|
40
40
|
version: 1.11.1
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: bundler
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - "~>"
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: '1.17'
|
48
|
+
type: :development
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - "~>"
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: '1.17'
|
55
|
+
- !ruby/object:Gem::Dependency
|
56
|
+
name: rake
|
57
|
+
requirement: !ruby/object:Gem::Requirement
|
58
|
+
requirements:
|
59
|
+
- - "~>"
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: '10.0'
|
62
|
+
type: :development
|
63
|
+
prerelease: false
|
64
|
+
version_requirements: !ruby/object:Gem::Requirement
|
65
|
+
requirements:
|
66
|
+
- - "~>"
|
67
|
+
- !ruby/object:Gem::Version
|
68
|
+
version: '10.0'
|
69
|
+
- !ruby/object:Gem::Dependency
|
70
|
+
name: minitest
|
71
|
+
requirement: !ruby/object:Gem::Requirement
|
72
|
+
requirements:
|
73
|
+
- - "~>"
|
74
|
+
- !ruby/object:Gem::Version
|
75
|
+
version: '5.0'
|
76
|
+
type: :development
|
77
|
+
prerelease: false
|
78
|
+
version_requirements: !ruby/object:Gem::Requirement
|
79
|
+
requirements:
|
80
|
+
- - "~>"
|
81
|
+
- !ruby/object:Gem::Version
|
82
|
+
version: '5.0'
|
41
83
|
description: This gem is web scraping from assignment 2 in Soft spec design lab.This
|
42
84
|
gem is created by 6210546013 Vichisorn Wejsupakul
|
43
85
|
email:
|