weget 0.1.2 → 0.1.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/weget/analysis.rb +37 -35
- data/lib/weget/config.rb +33 -20
- data/lib/weget/crawler.rb +136 -134
- data/lib/weget/list.txt +46 -0
- data/lib/weget/test.rb +14 -0
- data/lib/weget/version.rb +1 -1
- metadata +3 -1
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 7106d7acdefd043ffc661dd2feb6482abf5d8c979719bed2503d4d826454bcea
|
|
4
|
+
data.tar.gz: ac4b011d5124a7f33fcf0adf85d5f3919337afa37af45779ba58e778ec310c7c
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 1c770ea6a697d7f604d5ba6699bcf4b83e12b49c6308f3df4d92c23be6b050d2181ae3b8ab193199d22799c21dc11bef5577536e338343631534353dd9556c6b
|
|
7
|
+
data.tar.gz: 25919a8466a65913796a73267925391ceaa0bda9f78c6a8188d3073f59437f83138674256621b1e0ec3d7b7585b4d9b7438da2d10204bf3a04761d2f958920dc
|
data/lib/weget/analysis.rb
CHANGED
|
@@ -4,10 +4,6 @@ require 'spreadsheet'
|
|
|
4
4
|
require 'nokogiri'
|
|
5
5
|
require 'json'
|
|
6
6
|
|
|
7
|
-
exit if Object.const_defined?(:Ocra)
|
|
8
|
-
|
|
9
|
-
puts "html 分析"
|
|
10
|
-
puts "======================================"
|
|
11
7
|
def get_array_from_dom(dom)
|
|
12
8
|
arr = []
|
|
13
9
|
(1..100).each do |n|
|
|
@@ -41,40 +37,46 @@ def get_array_from_dom(dom)
|
|
|
41
37
|
arr
|
|
42
38
|
end
|
|
43
39
|
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
40
|
+
def analysis()
|
|
41
|
+
puts "html 分析"
|
|
42
|
+
puts "======================================"
|
|
43
|
+
arr = []
|
|
44
|
+
#Dir["./out/*_carlist*.html"].each do |file|
|
|
45
|
+
files = Dir.glob('./out/*_carlist*.html').sort_by { |fn| File.birthtime(fn) }
|
|
46
|
+
files.each do |file|
|
|
47
|
+
File.open(file, "r:UTF-8") do |f|
|
|
48
|
+
puts "ファイル取り込み #{file}"
|
|
49
|
+
content = f.read
|
|
50
|
+
dom = Nokogiri::HTML(content)
|
|
51
|
+
arr.concat(get_array_from_dom(dom))
|
|
52
|
+
puts "======================================"
|
|
53
|
+
end
|
|
52
54
|
end
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
sheet =
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
# 指定したセルの文字色と背景色を指定
|
|
61
|
-
colIndex = 0
|
|
62
|
-
arr[0].keys.each do |col|
|
|
63
|
-
sheet[0, colIndex] = col
|
|
64
|
-
sheet.row(0).set_format(colIndex, format)
|
|
65
|
-
colIndex = colIndex + 1
|
|
66
|
-
end
|
|
67
|
-
|
|
68
|
-
rowIndex = 1
|
|
69
|
-
arr.each do |row|
|
|
55
|
+
fileName = "./out/" + Time.now.to_i.to_s + '.xls'
|
|
56
|
+
puts "EXECL出力開始"
|
|
57
|
+
book = Spreadsheet::Workbook.new
|
|
58
|
+
sheet = book.create_worksheet
|
|
59
|
+
sheet.name = 'carlist'
|
|
60
|
+
format = Spreadsheet::Format.new :color=> :white, :pattern_fg_color => :blue, :pattern => 1
|
|
61
|
+
# 指定したセルの文字色と背景色を指定
|
|
70
62
|
colIndex = 0
|
|
71
63
|
arr[0].keys.each do |col|
|
|
72
64
|
sheet[0, colIndex] = col
|
|
73
|
-
sheet
|
|
65
|
+
sheet.row(0).set_format(colIndex, format)
|
|
74
66
|
colIndex = colIndex + 1
|
|
75
67
|
end
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
68
|
+
|
|
69
|
+
rowIndex = 1
|
|
70
|
+
arr.each do |row|
|
|
71
|
+
colIndex = 0
|
|
72
|
+
arr[0].keys.each do |col|
|
|
73
|
+
sheet[0, colIndex] = col
|
|
74
|
+
sheet[rowIndex, colIndex] = row[col]
|
|
75
|
+
colIndex = colIndex + 1
|
|
76
|
+
end
|
|
77
|
+
rowIndex = rowIndex + 1;
|
|
78
|
+
end
|
|
79
|
+
book.write fileName
|
|
80
|
+
puts "EXECL出力完了 => #{fileName}"
|
|
81
|
+
puts "======================================"
|
|
82
|
+
end
|
data/lib/weget/config.rb
CHANGED
|
@@ -1,28 +1,41 @@
|
|
|
1
1
|
# coding: utf-8
|
|
2
|
+
require 'cgi'
|
|
2
3
|
|
|
3
4
|
USER_ID = "xxxxxx"
|
|
4
5
|
USER_PASS = "xxxxxx"
|
|
6
|
+
URL_BASE = "https://www.iauc.co.jp"
|
|
7
|
+
URL_SERVICE = "#{URL_BASE}/service/"
|
|
8
|
+
URL_TAB_CREATE = "#{URL_BASE}/ajax/tab/create"
|
|
9
|
+
URL_NOTICE_PRE_LOGIN = "#{URL_BASE}/ajax/service/notice_pre_login"
|
|
10
|
+
URL_REQUIRED_NOTICE = "#{URL_BASE}/ajax/service/required_notice"
|
|
11
|
+
URL_REQUIRED_NOTICE_TOP = "#{URL_BASE}/ajax/service/required_notice_top"
|
|
12
|
+
URL_LOGIN = "#{URL_BASE}/service/login"
|
|
13
|
+
URL_MARKET = "#{URL_BASE}/market/"
|
|
14
|
+
URL_LOGOUT = "#{URL_BASE}/service/logout"
|
|
15
|
+
URL_TAB_CHANGE = "#{URL_BASE}/ajax/tab/change"
|
|
5
16
|
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
URL_LOGOUT = 'https://www.iauc.co.jp/service/logout'
|
|
17
|
+
SITEIDS = [25,12,14,13,15,11,10,65,231,2,4,1,20,22,23,21,163,28,253,62,63,68,71,80,51,58,117,93,250,142,148,139,146,147,192,193,211,216,219,197,198,217,212,3,89,165,40,91,43,114,111,252,224,7,90,59,57,84,52,81,140,194,18,19,16,143,39,42,6,29,35,37,70,79,54,61,60,67,87,95,88,119,251,196,195,240,241,242,86,55,64,53,66,82,48,221,222,223,30,73,33,41,113,237,238,239,243,5,74,78,56,206,31,235,17,34,36,38]
|
|
18
|
+
#URL_SEARCH_MARKET = "#{URL_BASE}/ajax/search/makers?vehicleType=car&mode=market&siteId=25%2C12%2C14%2C13%2C15%2C11%2C10%2C65%2C231%2C2%2C4%2C1%2C20%2C22%2C23%2C21%2C163%2C28%2C253%2C62%2C63%2C68%2C71%2C80%2C51%2C58%2C117%2C93%2C250%2C142%2C148%2C139%2C146%2C147%2C192%2C193%2C211%2C216%2C219%2C197%2C198%2C217%2C212%2C3%2C89%2C165%2C40%2C91%2C43%2C114%2C111%2C252%2C224%2C7%2C90%2C59%2C57%2C84%2C52%2C81%2C140%2C194%2C18%2C19%2C16%2C143%2C39%2C42%2C6%2C29%2C35%2C37%2C70%2C79%2C54%2C61%2C60%2C67%2C87%2C95%2C88%2C119%2C251%2C196%2C195%2C240%2C241%2C242%2C86%2C55%2C64%2C53%2C66%2C82%2C48%2C221%2C222%2C223%2C30%2C73%2C33%2C41%2C113%2C237%2C238%2C239%2C243%2C5%2C74%2C78%2C56%2C206%2C31%2C235%2C17%2C34%2C36%2C38&_=1524808327595"
|
|
19
|
+
URL_SEARCH_MARKET = "#{URL_BASE}/ajax/search/makers?vehicleType=car&mode=market&siteId=#{CGI.escape(SITEIDS.join(","))}&_=1524808327595"
|
|
20
|
+
#URL_SEARCH_MARKET2 = "#{URL_BASE}/market/search?d=25%2C12%2C14%2C13%2C15%2C11%2C10%2C65%2C231%2C2%2C4%2C1%2C20%2C22%2C23%2C21%2C163%2C28%2C253%2C62%2C63%2C68%2C71%2C80%2C51%2C58%2C117%2C93%2C250%2C142%2C148%2C139%2C146%2C147%2C192%2C193%2C211%2C216%2C219%2C197%2C198%2C217%2C212%2C3%2C89%2C165%2C40%2C91%2C43%2C114%2C111%2C252%2C224%2C7%2C90%2C59%2C57%2C84%2C52%2C81%2C140%2C194%2C18%2C19%2C16%2C143%2C39%2C42%2C6%2C29%2C35%2C37%2C70%2C79%2C54%2C61%2C60%2C67%2C87%2C95%2C88%2C119%2C251%2C196%2C195%2C240%2C241%2C242%2C86%2C55%2C64%2C53%2C66%2C82%2C48%2C221%2C222%2C223%2C30%2C73%2C33%2C41%2C113%2C237%2C238%2C239%2C243%2C5%2C74%2C78%2C56%2C206%2C31%2C235%2C17%2C34%2C36%2C38&referer_site=true"
|
|
21
|
+
URL_SEARCH_MARKET2 = "#{URL_BASE}/market/search?d=#{CGI.escape(SITEIDS.join(","))}&referer_site=true"
|
|
22
|
+
#URL_SEARCH_VEHICLE = "#{URL_BASE}/ajax/search/vehicle_names?vehicleType=car&makerId=8&siteId=25%2C12%2C14%2C13%2C15%2C11%2C10%2C65%2C231%2C2%2C4%2C1%2C20%2C22%2C23%2C21%2C163%2C28%2C253%2C62%2C63%2C68%2C71%2C80%2C51%2C58%2C117%2C93%2C250%2C142%2C148%2C139%2C146%2C147%2C192%2C193%2C211%2C216%2C219%2C197%2C198%2C217%2C212%2C3%2C89%2C165%2C40%2C91%2C43%2C114%2C111%2C252%2C224%2C7%2C90%2C59%2C57%2C84%2C52%2C81%2C140%2C194%2C18%2C19%2C16%2C143%2C39%2C42%2C6%2C29%2C35%2C37%2C70%2C79%2C54%2C61%2C60%2C67%2C87%2C95%2C88%2C119%2C251%2C196%2C195%2C240%2C241%2C242%2C86%2C55%2C64%2C53%2C66%2C82%2C48%2C221%2C222%2C223%2C30%2C73%2C33%2C41%2C113%2C237%2C238%2C239%2C243%2C5%2C74%2C78%2C56%2C206%2C31%2C235%2C17%2C34%2C36%2C38&mode=market&_=1524808327596"
|
|
23
|
+
URL_SEARCH_VEHICLE = "#{URL_BASE}/ajax/search/vehicle_names?vehicleType=car&makerId=#{ENV['MAKERID']}&siteId=#{CGI.escape(SITEIDS.join(","))}&mode=market&_=1524808327596"
|
|
14
24
|
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
URL_SEARCH_GRADES = 'https://www.iauc.co.jp/ajax/search/grades?makerVehicleId%5B%5D=8-%EF%BD%B4%EF%BE%99%EF%BE%8C&siteId=25%2C12%2C14%2C13%2C15%2C11%2C10%2C65%2C231%2C2%2C4%2C1%2C20%2C22%2C23%2C21%2C163%2C28%2C253%2C62%2C63%2C68%2C71%2C80%2C51%2C58%2C117%2C93%2C250%2C142%2C148%2C139%2C146%2C147%2C192%2C193%2C211%2C216%2C219%2C197%2C198%2C217%2C212%2C3%2C89%2C165%2C40%2C91%2C43%2C114%2C111%2C252%2C224%2C7%2C90%2C59%2C57%2C84%2C52%2C81%2C140%2C194%2C18%2C19%2C16%2C143%2C39%2C42%2C6%2C29%2C35%2C37%2C70%2C79%2C54%2C61%2C60%2C67%2C87%2C95%2C88%2C119%2C251%2C196%2C195%2C240%2C241%2C242%2C86%2C55%2C64%2C53%2C66%2C82%2C48%2C221%2C222%2C223%2C30%2C73%2C33%2C41%2C113%2C237%2C238%2C239%2C243%2C5%2C74%2C78%2C56%2C206%2C31%2C235%2C17%2C34%2C36%2C38&ep=%2FmarketPrice%2FcarNames%2Ftypes%2Fgrades&_=1524808327598'
|
|
20
|
-
URL_SEARCH_TYPES = 'https://www.iauc.co.jp/ajax/search/types?makerVehicleId%5B%5D=8-%EF%BD%B4%EF%BE%99%EF%BE%8C&siteId=25%2C12%2C14%2C13%2C15%2C11%2C10%2C65%2C231%2C2%2C4%2C1%2C20%2C22%2C23%2C21%2C163%2C28%2C253%2C62%2C63%2C68%2C71%2C80%2C51%2C58%2C117%2C93%2C250%2C142%2C148%2C139%2C146%2C147%2C192%2C193%2C211%2C216%2C219%2C197%2C198%2C217%2C212%2C3%2C89%2C165%2C40%2C91%2C43%2C114%2C111%2C252%2C224%2C7%2C90%2C59%2C57%2C84%2C52%2C81%2C140%2C194%2C18%2C19%2C16%2C143%2C39%2C42%2C6%2C29%2C35%2C37%2C70%2C79%2C54%2C61%2C60%2C67%2C87%2C95%2C88%2C119%2C251%2C196%2C195%2C240%2C241%2C242%2C86%2C55%2C64%2C53%2C66%2C82%2C48%2C221%2C222%2C223%2C30%2C73%2C33%2C41%2C113%2C237%2C238%2C239%2C243%2C5%2C74%2C78%2C56%2C206%2C31%2C235%2C17%2C34%2C36%2C38&ep=%2FmarketPrice%2FcarNames%2Ftypes&_=1524808327597'
|
|
25
|
+
MAKER_VEHICLEID = "#{ENV['MAKERID']}-#{ENV['MAKERTYPE'].encode(Encoding::UTF_8)}"
|
|
26
|
+
URL_GRADES_END_FIX = "ep=#{CGI.escape("/marketPrice/carNames/types/grades")}&_=1524808327598"
|
|
27
|
+
#URL_SEARCH_GRADES = "#{URL_BASE}/ajax/search/grades?makerVehicleId%5B%5D=8-%EF%BD%B4%EF%BE%99%EF%BE%8C&siteId=25%2C12%2C14%2C13%2C15%2C11%2C10%2C65%2C231%2C2%2C4%2C1%2C20%2C22%2C23%2C21%2C163%2C28%2C253%2C62%2C63%2C68%2C71%2C80%2C51%2C58%2C117%2C93%2C250%2C142%2C148%2C139%2C146%2C147%2C192%2C193%2C211%2C216%2C219%2C197%2C198%2C217%2C212%2C3%2C89%2C165%2C40%2C91%2C43%2C114%2C111%2C252%2C224%2C7%2C90%2C59%2C57%2C84%2C52%2C81%2C140%2C194%2C18%2C19%2C16%2C143%2C39%2C42%2C6%2C29%2C35%2C37%2C70%2C79%2C54%2C61%2C60%2C67%2C87%2C95%2C88%2C119%2C251%2C196%2C195%2C240%2C241%2C242%2C86%2C55%2C64%2C53%2C66%2C82%2C48%2C221%2C222%2C223%2C30%2C73%2C33%2C41%2C113%2C237%2C238%2C239%2C243%2C5%2C74%2C78%2C56%2C206%2C31%2C235%2C17%2C34%2C36%2C38&ep=%2FmarketPrice%2FcarNames%2Ftypes%2Fgrades&_=1524808327598"
|
|
28
|
+
URL_SEARCH_GRADES = "#{URL_BASE}/ajax/search/grades?makerVehicleId%5B%5D=#{CGI.escape(MAKER_VEHICLEID)}&siteId=#{CGI.escape(SITEIDS.join(","))}&#{URL_GRADES_END_FIX}"
|
|
21
29
|
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
30
|
+
URL_TYPES_END_FIX = "ep=#{CGI.escape("/marketPrice/carNames/types")}&_=1524808327597"
|
|
31
|
+
#URL_SEARCH_TYPES = "#{URL_BASE}/ajax/search/types?makerVehicleId%5B%5D=8-%EF%BD%B4%EF%BE%99%EF%BE%8C&siteId=25%2C12%2C14%2C13%2C15%2C11%2C10%2C65%2C231%2C2%2C4%2C1%2C20%2C22%2C23%2C21%2C163%2C28%2C253%2C62%2C63%2C68%2C71%2C80%2C51%2C58%2C117%2C93%2C250%2C142%2C148%2C139%2C146%2C147%2C192%2C193%2C211%2C216%2C219%2C197%2C198%2C217%2C212%2C3%2C89%2C165%2C40%2C91%2C43%2C114%2C111%2C252%2C224%2C7%2C90%2C59%2C57%2C84%2C52%2C81%2C140%2C194%2C18%2C19%2C16%2C143%2C39%2C42%2C6%2C29%2C35%2C37%2C70%2C79%2C54%2C61%2C60%2C67%2C87%2C95%2C88%2C119%2C251%2C196%2C195%2C240%2C241%2C242%2C86%2C55%2C64%2C53%2C66%2C82%2C48%2C221%2C222%2C223%2C30%2C73%2C33%2C41%2C113%2C237%2C238%2C239%2C243%2C5%2C74%2C78%2C56%2C206%2C31%2C235%2C17%2C34%2C36%2C38&ep=%2FmarketPrice%2FcarNames%2Ftypes&_=1524808327597"
|
|
32
|
+
URL_SEARCH_TYPES = "#{URL_BASE}/ajax/search/types?makerVehicleId%5B%5D=#{CGI.escape(MAKER_VEHICLEID)}&siteId=#{CGI.escape(SITEIDS.join(","))}&#{URL_TYPES_END_FIX}"
|
|
25
33
|
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
34
|
+
URL_CARLIST = "#{URL_BASE}/market/carlist"
|
|
35
|
+
URL_CARLIST_NARROW = "#{URL_BASE}/ajax/list/carlist_narrow"
|
|
36
|
+
URL_CARLIST_NEXTPAGE = "#{URL_BASE}/ajax/list/carlist?element=&sort_type=&limit=100&branch=&controller=marketvehicle&display_mode=&page_myListId=&page_requestId=&type=page&isHtml=true&preload=0&mode=table&referrer="
|
|
37
|
+
|
|
38
|
+
COOKIE_CARLIST = "iauc_lang=ja; _ga=GA1.3.1960749188.1524724940; _gid=GA1.3.162237504.1524724940; iauc_rf=0; iauc_limit_vehicle=15; iauc_limit_market_vehicle=100; iauc_search_period_vehicle=12; _gat=1;"
|
|
39
|
+
QUERY_TYPE = "#{ENV['MAKERID']}-#{CGI.escape("{" + ENV['MAKERTYPE'].encode(Encoding::UTF_8) + "}")}"
|
|
40
|
+
#QUERY_CARLIST = "boxMakerScroll=0&domesticMakerScroll=106&foreignMakerScroll=200&boxTypeScroll=0&boxModelScroll=0&boxGradeScroll=0&attrColorScroll=0&makerAccordionButtonOpen=0&nameAccordionButtonOpen=0&maker%5B%5D=8&type%5B%5D=8-%7B%EF%BD%B4%EF%BE%99%EF%BE%8C%7D&searchPeriod=12&modelOfYearFrom=&modelOfYearTo=&mileageFrom=&mileageTo=&rateFrom=&rateTo=&vehicleInspection=&shiftType=&search_mode=maker"
|
|
41
|
+
QUERY_CARLIST = "boxMakerScroll=0&domesticMakerScroll=106&foreignMakerScroll=200&boxTypeScroll=0&boxModelScroll=0&boxGradeScroll=0&attrColorScroll=0&makerAccordionButtonOpen=0&nameAccordionButtonOpen=0&maker%5B%5D=#{ENV['MAKERID']}&type%5B%5D=#{QUERY_TYPE}&searchPeriod=12&modelOfYearFrom=&modelOfYearTo=&mileageFrom=&mileageTo=&rateFrom=&rateTo=&vehicleInspection=&shiftType=&search_mode=maker"
|
data/lib/weget/crawler.rb
CHANGED
|
@@ -6,163 +6,165 @@ require './lib.rb'
|
|
|
6
6
|
require './config.rb'
|
|
7
7
|
|
|
8
8
|
ENV['SSL_CERT_FILE'] = File.join(File.dirname($0), 'cert.pem')
|
|
9
|
-
exit if Object.const_defined?(:Ocra)
|
|
10
|
-
#1 セットVist
|
|
11
|
-
puts "1 セットVist #{URL_SERVICE}"
|
|
12
|
-
res = get(URL_SERVICE, nil)
|
|
13
|
-
setCookie = res.header["set-cookie"].to_s
|
|
14
|
-
cookies = setCookie.split(";")
|
|
15
|
-
stmp = find_cookie(cookies, " path=/, _l_tm_stmp=")
|
|
16
|
-
fuelrid = find_cookie(cookies, " path=/, fuelrid=")
|
|
17
|
-
awselb = find_cookie(cookies, " path=/, AWSELB=")
|
|
18
|
-
|
|
19
|
-
cookie = "iauc_lang=ja; _l_tm_stmp=#{stmp}; fuelrid=#{fuelrid}; AWSELB=#{awselb}"
|
|
20
|
-
puts "1 セットVist cookie => #{cookie}"
|
|
21
|
-
puts "======================================"
|
|
22
|
-
|
|
23
|
-
#2 新規TID
|
|
24
|
-
# puts "2 新規TID #{URL_TAB_CREATE}"
|
|
25
|
-
# tab_id = ""
|
|
26
|
-
# res = post(URL_TAB_CREATE, cookie, "old_tab_id=#{tab_id}&has_focus=true", nil)
|
|
27
|
-
# fileName = "./out/tid.html"
|
|
28
|
-
# File.open(fileName, 'w') { |file| file.write(res.body) }
|
|
29
|
-
# exit
|
|
30
|
-
# xml = Nokogiri::XML(res.body)
|
|
31
|
-
# tab_id = xml.xpath("//tab_id")[0].text
|
|
32
|
-
xml = Nokogiri::HTML(res.body)
|
|
33
|
-
tab_id = xml.xpath('//*[@id="slider"]/div[5]/a')[0]["href"]
|
|
34
|
-
tab_id = tab_id.split('?__tid=')[1]
|
|
35
|
-
puts "2 新規TID tid => #{tab_id}"
|
|
36
|
-
puts "======================================"
|
|
37
|
-
|
|
38
|
-
#3 仮ログイン
|
|
39
|
-
puts "3 仮ログイン #{URL_NOTICE_PRE_LOGIN}"
|
|
40
|
-
data = "id=#{USER_ID}&password=#{USER_PASS}&referer=normal"
|
|
41
|
-
res = post(URL_NOTICE_PRE_LOGIN, cookie, data, nil)
|
|
42
|
-
xml = Nokogiri::XML(res.body)
|
|
43
|
-
item = xml.xpath("//item")[0].text
|
|
44
|
-
puts "3 仮ログイン item => #{item}"
|
|
45
|
-
puts "======================================"
|
|
46
|
-
|
|
47
|
-
#4 お知らせ
|
|
48
|
-
puts "4 お知らせ1 #{URL_REQUIRED_NOTICE}"
|
|
49
|
-
res = get("#{URL_REQUIRED_NOTICE}?id=#{item}&_=15247339838444", cookie)
|
|
50
|
-
puts "======================================"
|
|
51
|
-
|
|
52
|
-
#5 お知らせ
|
|
53
|
-
puts "5 お知らせ2 #{URL_REQUIRED_NOTICE_TOP}"
|
|
54
|
-
res = get("#{URL_REQUIRED_NOTICE_TOP}?id=#{item}&_=1524733983845", cookie)
|
|
55
|
-
puts "======================================"
|
|
56
|
-
|
|
57
|
-
#6 ログイン
|
|
58
|
-
puts "6 ログイン #{URL_LOGIN}"
|
|
59
|
-
data = "id=#{USER_ID}&password=#{USER_PASS}&__tid=#{tab_id}"
|
|
60
|
-
res = post(URL_LOGIN, cookie, data, "#{URL_LOGIN}?__tid=#{tab_id}")
|
|
61
|
-
puts "======================================"
|
|
62
|
-
|
|
63
|
-
begin
|
|
64
|
-
#相場検索⇒全選択⇒次へ=>イスズ⇒エルフ⇒次へ=>次ページ .... =>lastページ
|
|
65
|
-
now = Time.now.to_i.to_s
|
|
66
|
-
#7.1 ページVist 相場会場選択
|
|
67
|
-
puts "7.1 ページVist 相場会場選択 #{URL_MARKET}"
|
|
68
|
-
url = "#{URL_MARKET}?sess=clear&sess_m=clear?__tid=#{tab_id}"
|
|
69
|
-
res = post(url, cookie, "", "#{URL_LOGIN}?__tid=#{tab_id}")
|
|
70
|
-
fileName = "./out/#{now}.html"
|
|
71
|
-
File.open(fileName, 'w') { |file| file.write(res.body) }
|
|
72
|
-
puts "7.1 ページVist 相場会場選択 output => #{fileName}"
|
|
73
|
-
puts "======================================"
|
|
74
9
|
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
10
|
+
def crawl(maker, type)
|
|
11
|
+
#1 セットVist
|
|
12
|
+
puts "1 セットVist #{URL_SERVICE}"
|
|
13
|
+
res = get(URL_SERVICE, nil)
|
|
14
|
+
setCookie = res.header["set-cookie"].to_s
|
|
15
|
+
cookies = setCookie.split(";")
|
|
16
|
+
stmp = find_cookie(cookies, " path=/, _l_tm_stmp=")
|
|
17
|
+
fuelrid = find_cookie(cookies, " path=/, fuelrid=")
|
|
18
|
+
awselb = find_cookie(cookies, " path=/, AWSELB=")
|
|
19
|
+
|
|
20
|
+
cookie = "iauc_lang=ja; _l_tm_stmp=#{stmp}; fuelrid=#{fuelrid}; AWSELB=#{awselb}"
|
|
21
|
+
puts "1 セットVist cookie => #{cookie}"
|
|
81
22
|
puts "======================================"
|
|
82
23
|
|
|
83
|
-
#
|
|
84
|
-
puts "
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
24
|
+
#2 新規TID
|
|
25
|
+
# puts "2 新規TID #{URL_TAB_CREATE}"
|
|
26
|
+
# tab_id = ""
|
|
27
|
+
# res = post(URL_TAB_CREATE, cookie, "old_tab_id=#{tab_id}&has_focus=true", nil)
|
|
28
|
+
# fileName = "./out/tid.html"
|
|
29
|
+
# File.open(fileName, 'w') { |file| file.write(res.body) }
|
|
30
|
+
# exit
|
|
31
|
+
# xml = Nokogiri::XML(res.body)
|
|
32
|
+
# tab_id = xml.xpath("//tab_id")[0].text
|
|
33
|
+
xml = Nokogiri::HTML(res.body)
|
|
34
|
+
tab_id = xml.xpath('//*[@id="slider"]/div[5]/a')[0]["href"]
|
|
35
|
+
tab_id = tab_id.split('?__tid=')[1]
|
|
36
|
+
puts "2 新規TID tid => #{tab_id}"
|
|
89
37
|
puts "======================================"
|
|
90
38
|
|
|
91
|
-
#
|
|
92
|
-
puts "
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
39
|
+
#3 仮ログイン
|
|
40
|
+
puts "3 仮ログイン #{URL_NOTICE_PRE_LOGIN}"
|
|
41
|
+
data = "id=#{USER_ID}&password=#{USER_PASS}&referer=normal"
|
|
42
|
+
res = post(URL_NOTICE_PRE_LOGIN, cookie, data, nil)
|
|
43
|
+
xml = Nokogiri::XML(res.body)
|
|
44
|
+
item = xml.xpath("//item")[0].text
|
|
45
|
+
puts "3 仮ログイン item => #{item}"
|
|
97
46
|
puts "======================================"
|
|
98
47
|
|
|
99
|
-
#
|
|
100
|
-
puts "
|
|
101
|
-
res = get("#{
|
|
102
|
-
fileName = "./out/#{now}_grade.json"
|
|
103
|
-
File.open(fileName, 'w') { |file| file.write(res.body) }
|
|
104
|
-
puts "7.5 グレード検索 output => #{fileName}"
|
|
48
|
+
#4 お知らせ
|
|
49
|
+
puts "4 お知らせ1 #{URL_REQUIRED_NOTICE}"
|
|
50
|
+
res = get("#{URL_REQUIRED_NOTICE}?id=#{item}&_=15247339838444", cookie)
|
|
105
51
|
puts "======================================"
|
|
106
52
|
|
|
107
|
-
#
|
|
108
|
-
puts "
|
|
109
|
-
res = get("#{
|
|
110
|
-
fileName = "./out/#{now}_type.json"
|
|
111
|
-
File.open(fileName, 'w') { |file| file.write(res.body) }
|
|
112
|
-
puts "7.6 タイプ検索 output => #{fileName}"
|
|
53
|
+
#5 お知らせ
|
|
54
|
+
puts "5 お知らせ2 #{URL_REQUIRED_NOTICE_TOP}"
|
|
55
|
+
res = get("#{URL_REQUIRED_NOTICE_TOP}?id=#{item}&_=1524733983845", cookie)
|
|
113
56
|
puts "======================================"
|
|
114
57
|
|
|
115
|
-
#
|
|
116
|
-
puts "
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
res = post(URL_CARLIST, cookie, data, "#{URL_SEARCH_MARKET2}&__tid=#{tab_id}")
|
|
120
|
-
fileName = "./out/#{now}_carlist.html"
|
|
121
|
-
File.open(fileName, 'w') { |file| file.write(res.body) }
|
|
122
|
-
puts "7.8 ページcarlist イスズ エルフ page=1 output => #{fileName}"
|
|
58
|
+
#6 ログイン
|
|
59
|
+
puts "6 ログイン #{URL_LOGIN}"
|
|
60
|
+
data = "id=#{USER_ID}&password=#{USER_PASS}&__tid=#{tab_id}"
|
|
61
|
+
res = post(URL_LOGIN, cookie, data, "#{URL_LOGIN}?__tid=#{tab_id}")
|
|
123
62
|
puts "======================================"
|
|
124
63
|
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
car_data = JSON.parse(res.body)
|
|
137
|
-
fileName = "./out/#{now}_carlist_page#{page}.html"
|
|
138
|
-
File.open(fileName, 'w') { |file| file.write(car_data["res_html"]) }
|
|
139
|
-
puts "7.9 ページcarlist イスズ エルフ count:#{car_data['count']} page=#{page} output => #{fileName}"
|
|
140
|
-
puts "======================================"
|
|
64
|
+
begin
|
|
65
|
+
#相場検索⇒全選択⇒次へ=>イスズ⇒エルフ⇒次へ=>次ページ .... =>lastページ
|
|
66
|
+
now = Time.now.to_i.to_s
|
|
67
|
+
#7.1 ページVist 相場会場選択
|
|
68
|
+
puts "7.1 ページVist 相場会場選択 #{URL_MARKET}"
|
|
69
|
+
url = "#{URL_MARKET}?sess=clear&sess_m=clear?__tid=#{tab_id}"
|
|
70
|
+
res = post(url, cookie, "", "#{URL_LOGIN}?__tid=#{tab_id}")
|
|
71
|
+
fileName = "./out/#{now}.html"
|
|
72
|
+
File.open(fileName, 'w') { |file| file.write(res.body) }
|
|
73
|
+
puts "7.1 ページVist 相場会場選択 output => #{fileName}"
|
|
74
|
+
puts "======================================"
|
|
141
75
|
|
|
142
|
-
|
|
76
|
+
#7.2 相場会場検索
|
|
77
|
+
puts "7.2 相場会場検索 #{URL_SEARCH_MARKET}"
|
|
78
|
+
res = get("#{URL_SEARCH_MARKET}&__tid=#{tab_id}", cookie)
|
|
79
|
+
fileName = "./out/#{now}_maker.json"
|
|
80
|
+
File.open(fileName, 'w') { |file| file.write(res.body) }
|
|
81
|
+
puts "7.2 相場会場検索 output => #{fileName}"
|
|
82
|
+
puts "======================================"
|
|
143
83
|
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
84
|
+
#7.3 相場会場検索2
|
|
85
|
+
puts "7.3 相場会場検索2 #{URL_SEARCH_MARKET2}"
|
|
86
|
+
res = get("#{URL_SEARCH_MARKET2}&__tid=#{tab_id}", cookie)
|
|
87
|
+
fileName = "./out/#{now}_maker.html"
|
|
88
|
+
File.open(fileName, 'w') { |file| file.write(res.body) }
|
|
89
|
+
puts "7.3 相場会場検索2 output => #{fileName}"
|
|
90
|
+
puts "======================================"
|
|
91
|
+
|
|
92
|
+
#7.4 車両検索
|
|
93
|
+
puts "7.4 車両検索 #{URL_SEARCH_VEHICLE}"
|
|
94
|
+
res = get("#{URL_SEARCH_VEHICLE}&__tid=#{tab_id}", cookie)
|
|
95
|
+
fileName = "./out/#{now}_vehicle.json"
|
|
96
|
+
File.open(fileName, 'w') { |file| file.write(res.body) }
|
|
97
|
+
puts "7.4 車両検索 output => #{fileName}"
|
|
98
|
+
puts "======================================"
|
|
99
|
+
|
|
100
|
+
#7.5 グレード検索
|
|
101
|
+
puts "7.5 グレード検索 #{URL_SEARCH_GRADES}"
|
|
102
|
+
res = get("#{URL_SEARCH_GRADES}&__tid=#{tab_id}", cookie)
|
|
103
|
+
fileName = "./out/#{now}_grade.json"
|
|
104
|
+
File.open(fileName, 'w') { |file| file.write(res.body) }
|
|
105
|
+
puts "7.5 グレード検索 output => #{fileName}"
|
|
106
|
+
puts "======================================"
|
|
107
|
+
|
|
108
|
+
#7.6 タイプ検索
|
|
109
|
+
puts "7.6 タイプ検索 #{URL_SEARCH_TYPES}"
|
|
110
|
+
res = get("#{URL_SEARCH_TYPES}&__tid=#{tab_id}", cookie)
|
|
111
|
+
fileName = "./out/#{now}_type.json"
|
|
112
|
+
File.open(fileName, 'w') { |file| file.write(res.body) }
|
|
113
|
+
puts "7.6 タイプ検索 output => #{fileName}"
|
|
114
|
+
puts "======================================"
|
|
115
|
+
|
|
116
|
+
#7.8 ページcarlist page=1
|
|
117
|
+
puts "7.8 ページcarlist #{maker}-#{type.encode(Encoding::UTF_8)} page=1 #{URL_CARLIST}"
|
|
118
|
+
cookie = "#{COOKIE_CARLIST} fuelrid=#{fuelrid}; AWSELB=#{awselb}; _l_tm_stmp=#{stmp}"
|
|
119
|
+
data = "__tid=#{tab_id}&#{QUERY_CARLIST}"
|
|
120
|
+
res = post(URL_CARLIST, cookie, data, "#{URL_SEARCH_MARKET2}&__tid=#{tab_id}")
|
|
121
|
+
fileName = "./out/#{now}_carlist_page1.html"
|
|
122
|
+
File.open(fileName, 'w') { |file| file.write(res.body) }
|
|
123
|
+
puts "7.8 ページcarlist #{maker}-#{type.encode(Encoding::UTF_8)} page=1 output => #{fileName}"
|
|
124
|
+
puts "======================================"
|
|
125
|
+
|
|
126
|
+
#7.9 ページcarlist page=2
|
|
127
|
+
page = 2
|
|
128
|
+
narrow_key = ""
|
|
147
129
|
data = "element=&narrow_data=&old_narrow_key=#{narrow_key}&__tid=#{tab_id}"
|
|
148
130
|
res = post(URL_CARLIST_NARROW, cookie, data, "#{URL_SEARCH_MARKET2}&__tid=#{tab_id}")
|
|
149
131
|
narrow_data = JSON.parse(res.body)
|
|
150
132
|
narrow_key = narrow_data["narrow_key"]
|
|
151
|
-
puts "7.
|
|
152
|
-
puts "7.
|
|
133
|
+
puts "7.9 ページcarlist narrow_key => #{narrow_key}"
|
|
134
|
+
puts "7.9 ページcarlist #{maker}-#{type.encode(Encoding::UTF_8)} page=#{page} #{URL_CARLIST_NEXTPAGE}"
|
|
153
135
|
url = "#{URL_CARLIST_NEXTPAGE}&page=#{page}&narrow_key=#{narrow_key}&__tid=#{tab_id}&_=#{Time.now.to_i.to_s}"
|
|
154
136
|
res = get(url, cookie)
|
|
155
137
|
car_data = JSON.parse(res.body)
|
|
156
138
|
fileName = "./out/#{now}_carlist_page#{page}.html"
|
|
157
139
|
File.open(fileName, 'w') { |file| file.write(car_data["res_html"]) }
|
|
158
|
-
puts "7.
|
|
140
|
+
puts "7.9 ページcarlist #{maker}-#{type.encode(Encoding::UTF_8)} count:#{car_data['count']} page=#{page} output => #{fileName}"
|
|
141
|
+
puts "======================================"
|
|
142
|
+
|
|
143
|
+
pageCnt = (car_data['count'].to_i / 100.00).ceil
|
|
144
|
+
|
|
145
|
+
(3..pageCnt).each do |p|
|
|
146
|
+
#7.9 ページcarlist イスズ エルフ page=3
|
|
147
|
+
page = p
|
|
148
|
+
data = "element=&narrow_data=&old_narrow_key=#{narrow_key}&__tid=#{tab_id}"
|
|
149
|
+
res = post(URL_CARLIST_NARROW, cookie, data, "#{URL_SEARCH_MARKET2}&__tid=#{tab_id}")
|
|
150
|
+
narrow_data = JSON.parse(res.body)
|
|
151
|
+
narrow_key = narrow_data["narrow_key"]
|
|
152
|
+
puts "7.10 ページcarlist narrow_key => #{narrow_key}"
|
|
153
|
+
puts "7.10 ページcarlist #{maker}-#{type.encode(Encoding::UTF_8)} page=#{page} #{URL_CARLIST_NEXTPAGE}"
|
|
154
|
+
url = "#{URL_CARLIST_NEXTPAGE}&page=#{page}&narrow_key=#{narrow_key}&__tid=#{tab_id}&_=#{Time.now.to_i.to_s}"
|
|
155
|
+
res = get(url, cookie)
|
|
156
|
+
car_data = JSON.parse(res.body)
|
|
157
|
+
fileName = "./out/#{now}_carlist_page#{page}.html"
|
|
158
|
+
File.open(fileName, 'w') { |file| file.write(car_data["res_html"]) }
|
|
159
|
+
puts "7.10 ページcarlist #{maker}-#{type.encode(Encoding::UTF_8)} count:#{car_data['count']} page=#{page} output => #{fileName}"
|
|
160
|
+
puts "======================================"
|
|
161
|
+
end
|
|
162
|
+
rescue => error
|
|
163
|
+
puts error.message
|
|
164
|
+
ensure
|
|
165
|
+
#10 ログアウト
|
|
166
|
+
puts "99 ログアウト #{URL_LOGOUT}"
|
|
167
|
+
res = get("#{URL_LOGOUT}?__tid=#{tab_id}", cookie)
|
|
159
168
|
puts "======================================"
|
|
160
169
|
end
|
|
161
|
-
|
|
162
|
-
puts error.message
|
|
163
|
-
ensure
|
|
164
|
-
#10 ログアウト
|
|
165
|
-
puts "99 ログアウト #{URL_LOGOUT}"
|
|
166
|
-
res = get("#{URL_LOGOUT}?__tid=#{tab_id}", cookie)
|
|
167
|
-
puts "======================================"
|
|
168
|
-
end
|
|
170
|
+
end
|
data/lib/weget/list.txt
ADDED
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
1,トヨタ,ダイナ,0,ダイナ
|
|
2
|
+
2,トヨタ,トヨエース,0,トヨエース
|
|
3
|
+
3,日産,アトラス,1,アトラス
|
|
4
|
+
4,日産,UD,1,UD
|
|
5
|
+
5,日産,ビックサム,1,ビックサム
|
|
6
|
+
6,日産,コンドル,1,コンドル
|
|
7
|
+
7,日産,クオン,1,クオン
|
|
8
|
+
8,三菱,エアロエース,2,エアロエース
|
|
9
|
+
9,三菱,エアロクイーシ,2,エアロクイーシ
|
|
10
|
+
10,三菱,エアロスター,2,エアロスター
|
|
11
|
+
11,三菱,エアロシディ,2,エアロシディ
|
|
12
|
+
12,三菱,キャンター,2,キャンター
|
|
13
|
+
13,三菱,キャンターガッツ,2,キャンターガッツ
|
|
14
|
+
14,三菱,スーパーグレート,2,スーパーグレート
|
|
15
|
+
15,三菱,フソウトラック,2,フソウトラック
|
|
16
|
+
16,三菱,フソウトラックファイター,2,フソウトラックファイター
|
|
17
|
+
17,三菱,フソウバス,2,フソウバス
|
|
18
|
+
18,三菱,ローザ,2,ローザ
|
|
19
|
+
19,マツダ,タイタン,3,タイタン
|
|
20
|
+
20,イスズ,イスズ,4,イスズ
|
|
21
|
+
21,イスズ,イスズトラック,4,イスズトラック
|
|
22
|
+
22,イスズ,イスズバス,4,イスズバス
|
|
23
|
+
23,イスズ,エルフ,4,エルフ
|
|
24
|
+
24,イスズ,エルフ100,4,エルフ100
|
|
25
|
+
25,イスズ,エルフ150,4,エルフ150
|
|
26
|
+
26,イスズ,エルフ250,4,エルフ250
|
|
27
|
+
27,イスズ,エルフ350,4,エルフ350
|
|
28
|
+
28,イスズ,エルフUT,4,エルフUT
|
|
29
|
+
29,イスズ,ガーラ,4,ガーラ
|
|
30
|
+
30,イスズ,ガーラミオ,4,ガーラミオ
|
|
31
|
+
31,イスズ,ギガ,4,ギガ
|
|
32
|
+
32,イスズ,コモ,4,コモ
|
|
33
|
+
33,イスズ,ジャーニー,4,ジャーニー
|
|
34
|
+
34,イスズ,ジャストン,4,ジャストン
|
|
35
|
+
35,イスズ,フォワード,4,フォワード
|
|
36
|
+
36,日野,ヒノ,5,ヒノ
|
|
37
|
+
37,日野,ヒノFD,5,ヒノFD
|
|
38
|
+
38,日野,ヒノデュオロ,5,ヒノデュオロ
|
|
39
|
+
39,日野,ヒノトラック,5,ヒノトラック
|
|
40
|
+
40,日野,ヒノバス,5,ヒノバス
|
|
41
|
+
41,日野,ヒノリエッセ,5,ヒノリエッセ
|
|
42
|
+
42,日野,ヒノレインボー,5,ヒノレインボー
|
|
43
|
+
43,日野,ヒノレンジャー,5,ヒノレンジャー
|
|
44
|
+
44,日野,ブルーリボン,5,ブルーリボン
|
|
45
|
+
45,日野,ポンチョ,5,ポンチョ
|
|
46
|
+
46,日野,メルファ,5,メルファ
|
data/lib/weget/test.rb
ADDED
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
# coding: utf-8
|
|
2
|
+
#require 'cgi'
|
|
3
|
+
#puts CGI.escape(url)
|
|
4
|
+
File.open('./list.txt', "r:UTF-8") do |file|
|
|
5
|
+
file.each_line do |line|
|
|
6
|
+
arr = line.split(",")
|
|
7
|
+
ENV['MAKERID'] = arr[3]
|
|
8
|
+
ENV['MAKERTYPE'] = arr[4]
|
|
9
|
+
require './crawler.rb'
|
|
10
|
+
require './analysis.rb'
|
|
11
|
+
crawl(arr[1], arr[2])
|
|
12
|
+
end
|
|
13
|
+
end
|
|
14
|
+
analysis()
|
data/lib/weget/version.rb
CHANGED
metadata
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: weget
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.1.
|
|
4
|
+
version: 0.1.3
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- shouaya
|
|
@@ -57,6 +57,8 @@ files:
|
|
|
57
57
|
- lib/weget/config.rb
|
|
58
58
|
- lib/weget/crawler.rb
|
|
59
59
|
- lib/weget/lib.rb
|
|
60
|
+
- lib/weget/list.txt
|
|
61
|
+
- lib/weget/test.rb
|
|
60
62
|
- lib/weget/version.rb
|
|
61
63
|
- weget.gemspec
|
|
62
64
|
homepage: https://shouaya.github.io
|