instagram-tag-search 0.0.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/lib/instagram-tag-search.rb +163 -0
- metadata +64 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: a588894582381be8e200cd8cb41b6fcf6f02f3f9
|
4
|
+
data.tar.gz: 1d9855124ea0c24f5d43c58b295938f023e0f7b1
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: df48f21760dcdccf2e6b995e33655f6b8b2b2e300ec3ac431cfd76297f3f53a666913415610a8f97450f8d86603b132cdeccd40a36bc5fab318efa8598dc654b
|
7
|
+
data.tar.gz: 1078cf8db43bfb851c83461477c225f47921c58df713ab2730969fc0ade5d18d744b101acd0acdc72abda05de24f279453d0cb928fde4120f059fe7c1aa3971b
|
@@ -0,0 +1,163 @@
|
|
1
|
+
require 'open-uri'
|
2
|
+
require 'nokogiri'
|
3
|
+
require 'json'
|
4
|
+
require 'net/http'
|
5
|
+
require 'net/https'
|
6
|
+
require 'uri'
|
7
|
+
require 'pp'
|
8
|
+
require 'openssl'
|
9
|
+
require 'CSV'
|
10
|
+
|
11
|
+
class InstagramData
|
12
|
+
attr_reader :tag_name, :get_number, :instagram_data
|
13
|
+
TAG_URL_PREFIX = 'https://www.instagram.com/explore/tags/'
|
14
|
+
|
15
|
+
def initialize(tag_name: '岸和田', get_number: 10)
|
16
|
+
@tag_name = tag_name
|
17
|
+
@get_number = get_number
|
18
|
+
@got_number = 0
|
19
|
+
end
|
20
|
+
|
21
|
+
def getNextPageData(endCursor, csrfToken, rhx_gis, gotNumber)
|
22
|
+
uri = URI.parse(TAG_URL_PREFIX + URI.encode_www_form_component(@tag_name) + "/?__a=1&max_id=" + endCursor)
|
23
|
+
http = Net::HTTP.new(uri.host, uri.port)
|
24
|
+
|
25
|
+
http.use_ssl = true
|
26
|
+
http.verify_mode = OpenSSL::SSL::VERIFY_NONE
|
27
|
+
|
28
|
+
req = Net::HTTP::Get.new(uri.request_uri)
|
29
|
+
|
30
|
+
res = http.request(req)
|
31
|
+
endCursor = JSON.parse(res.body)['graphql']['hashtag']['edge_hashtag_to_media']['page_info']['end_cursor']
|
32
|
+
dataArray = JSON.parse(res.body)['graphql']['hashtag']['edge_hashtag_to_media']['edges']
|
33
|
+
|
34
|
+
return dataArray, endCursor
|
35
|
+
end
|
36
|
+
|
37
|
+
def parseInstagramData(dataArray)
|
38
|
+
dataArray.length.times do |i|
|
39
|
+
insta_data = {userId: '', timestamp: '', pageUrl: '', likeCount: '', commentCount: '', caption: ''}
|
40
|
+
# ユーザIDの取得
|
41
|
+
insta_data[:userId] = dataArray[i]['node']['owner']['id']
|
42
|
+
# UNIXタイムからの変換
|
43
|
+
insta_data[:timestamp] = Time.at(dataArray[i]['node']['taken_at_timestamp'])
|
44
|
+
# 画像URLの取得
|
45
|
+
# writeData.push(dataArray[i]["thumbnail"])
|
46
|
+
# ページURLの取得
|
47
|
+
insta_data[:pageUrl] = "https://www.instagram.com/p/" + dataArray[i]['node']['shortcode'] + "/"
|
48
|
+
# いいねの数とコメントの数
|
49
|
+
insta_data[:likeCount] = dataArray[i]['node']['edge_liked_by']['count']
|
50
|
+
insta_data[:commentCount] = dataArray[i]['node']['edge_media_to_comment']['count']
|
51
|
+
|
52
|
+
# 投稿者コメントの取得
|
53
|
+
insta_data[:caption] = (dataArray[i]['node']['edge_media_to_caption']['edges'][0]['node']['text'])
|
54
|
+
|
55
|
+
# 投稿者コメントからタグのみ抽出
|
56
|
+
tags = (dataArray[i]['node']['edge_media_to_caption']['edges'][0]['node']['text'] + " ").scan(/[#][A-Za-zA-Za-z一-鿆0-90-9ぁ-ヶヲ-゚ー○]+/).join(" ")
|
57
|
+
insta_data[:tags] = tags
|
58
|
+
|
59
|
+
@instagram_data.push(insta_data)
|
60
|
+
@got_number += 1
|
61
|
+
# 予定取得枚数に到達したら終了
|
62
|
+
break if @got_number >= @get_number
|
63
|
+
end
|
64
|
+
end
|
65
|
+
|
66
|
+
def getInstagramData
|
67
|
+
tag_search_url = TAG_URL_PREFIX + URI.encode_www_form_component(@tag_name)
|
68
|
+
@instagram_data = []
|
69
|
+
|
70
|
+
# 文字コード
|
71
|
+
charset = nil
|
72
|
+
|
73
|
+
puts "#{@tag_name} のデータを #{@get_number} 件分取得します"
|
74
|
+
|
75
|
+
# タグ検索ページへアクセス
|
76
|
+
# 文字コードを取得しながら、アクセス
|
77
|
+
html = open(tag_search_url) do |f|
|
78
|
+
charset = f.charset
|
79
|
+
f.read
|
80
|
+
end
|
81
|
+
|
82
|
+
# 以下、Nokogiriによるアクセスに必要な情報の取得処理
|
83
|
+
# 全部のHTMLを取得
|
84
|
+
allDoc = Nokogiri::HTML.parse(html, nil, charset)
|
85
|
+
# メタ情報だけ取得
|
86
|
+
metaInfo = allDoc.css('body script').first.text
|
87
|
+
# 前後に不要な情報があるのでカット
|
88
|
+
metaInfo.slice!(0, 21)
|
89
|
+
metaInfo = metaInfo.chop
|
90
|
+
|
91
|
+
# 解析用JSONの保存
|
92
|
+
response_json = JSON.parse(metaInfo)
|
93
|
+
|
94
|
+
# データの中身を取得
|
95
|
+
dataArray = response_json['entry_data']['TagPage'][0]['graphql']['hashtag']['edge_hashtag_to_media']['edges'];
|
96
|
+
|
97
|
+
# 初期ページの分を取得
|
98
|
+
parseInstagramData(dataArray)
|
99
|
+
|
100
|
+
# 取得した件数を記録
|
101
|
+
puts "#{@got_number} 件取得しました"
|
102
|
+
|
103
|
+
# 取得枚数に足りていない場合
|
104
|
+
while @got_number < @get_number do
|
105
|
+
# 次のページの取得に必要な情報を取得
|
106
|
+
# csrfトークンの取得
|
107
|
+
csrfToken = response_json['config']['csrf_token']
|
108
|
+
# rhx_gisの取得
|
109
|
+
rhx_gis = response_json['rhx_gis']
|
110
|
+
# 次のページ取得用のカーソル
|
111
|
+
@endCursor = response_json['entry_data']['TagPage'][0]['graphql']['hashtag']['edge_hashtag_to_media']['page_info']['end_cursor']
|
112
|
+
|
113
|
+
puts "5秒待ってから再開します"
|
114
|
+
sleep 5
|
115
|
+
|
116
|
+
dataArray, @endCursor = getNextPageData(@endCursor, csrfToken, rhx_gis, @got_number)
|
117
|
+
|
118
|
+
parseInstagramData(dataArray)
|
119
|
+
|
120
|
+
# 取得した件数を記録
|
121
|
+
puts "#{@got_number} 件取得しました"
|
122
|
+
end
|
123
|
+
|
124
|
+
end
|
125
|
+
|
126
|
+
# CSVファイルのヘッダを記入
|
127
|
+
def csvHeaderWrite(csvfilename)
|
128
|
+
CSV.open(csvfilename, "ab+") do |csv|
|
129
|
+
writeData = Array.new
|
130
|
+
writeData.push("ユーザID")
|
131
|
+
writeData.push("投稿日時(日本時間)")
|
132
|
+
writeData.push("ページURL")
|
133
|
+
writeData.push("いいねの数")
|
134
|
+
writeData.push("コメント数")
|
135
|
+
writeData.push("投稿者コメント")
|
136
|
+
writeData.push("ハッシュタグ")
|
137
|
+
|
138
|
+
csv << writeData
|
139
|
+
end
|
140
|
+
end
|
141
|
+
|
142
|
+
# CSVファイルへの書き込み
|
143
|
+
def csvDataWrite(dataArray, csvfilename)
|
144
|
+
dataArray.each do |n|
|
145
|
+
puts "n write n is #{n}"
|
146
|
+
CSV.open(csvfilename, "ab+") do |csv|
|
147
|
+
# データはハッシュなので配列にし、キーを除き、値を代入する
|
148
|
+
writeData = n.to_a.map{|e| e[1]}
|
149
|
+
|
150
|
+
csv << writeData
|
151
|
+
end
|
152
|
+
end
|
153
|
+
end
|
154
|
+
|
155
|
+
# CSVファイルへの書き込み
|
156
|
+
def writeToCSV(dataArray, csvfilename: "getInstagramData_#{Time.now.strftime("%Y%m%d%H%M%S")}.csv")
|
157
|
+
# csvファイルにヘッダを記入
|
158
|
+
csvHeaderWrite(csvfilename)
|
159
|
+
# csvファイルに保存
|
160
|
+
csvDataWrite(dataArray, csvfilename)
|
161
|
+
end
|
162
|
+
|
163
|
+
end
|
metadata
ADDED
@@ -0,0 +1,64 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: instagram-tag-search
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.1
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Tom syamoji
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2018-07-23 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: nokogiri
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - "~>"
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '1.5'
|
20
|
+
- - ">="
|
21
|
+
- !ruby/object:Gem::Version
|
22
|
+
version: 1.5.6
|
23
|
+
type: :runtime
|
24
|
+
prerelease: false
|
25
|
+
version_requirements: !ruby/object:Gem::Requirement
|
26
|
+
requirements:
|
27
|
+
- - "~>"
|
28
|
+
- !ruby/object:Gem::Version
|
29
|
+
version: '1.5'
|
30
|
+
- - ">="
|
31
|
+
- !ruby/object:Gem::Version
|
32
|
+
version: 1.5.6
|
33
|
+
description: This gem gets instagram data with tag search
|
34
|
+
email: uniquequeue@gmail.com
|
35
|
+
executables: []
|
36
|
+
extensions: []
|
37
|
+
extra_rdoc_files: []
|
38
|
+
files:
|
39
|
+
- lib/instagram-tag-search.rb
|
40
|
+
homepage: https://github.com/syamoji/instagram-tag-search
|
41
|
+
licenses:
|
42
|
+
- MIT
|
43
|
+
metadata: {}
|
44
|
+
post_install_message:
|
45
|
+
rdoc_options: []
|
46
|
+
require_paths:
|
47
|
+
- lib
|
48
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
49
|
+
requirements:
|
50
|
+
- - ">="
|
51
|
+
- !ruby/object:Gem::Version
|
52
|
+
version: '0'
|
53
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
54
|
+
requirements:
|
55
|
+
- - ">="
|
56
|
+
- !ruby/object:Gem::Version
|
57
|
+
version: '0'
|
58
|
+
requirements: []
|
59
|
+
rubyforge_project:
|
60
|
+
rubygems_version: 2.5.2
|
61
|
+
signing_key:
|
62
|
+
specification_version: 4
|
63
|
+
summary: get instagram data with tag search
|
64
|
+
test_files: []
|