instagram-tag-search 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (3) hide show
  1. checksums.yaml +7 -0
  2. data/lib/instagram-tag-search.rb +163 -0
  3. metadata +64 -0
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: a588894582381be8e200cd8cb41b6fcf6f02f3f9
4
+ data.tar.gz: 1d9855124ea0c24f5d43c58b295938f023e0f7b1
5
+ SHA512:
6
+ metadata.gz: df48f21760dcdccf2e6b995e33655f6b8b2b2e300ec3ac431cfd76297f3f53a666913415610a8f97450f8d86603b132cdeccd40a36bc5fab318efa8598dc654b
7
+ data.tar.gz: 1078cf8db43bfb851c83461477c225f47921c58df713ab2730969fc0ade5d18d744b101acd0acdc72abda05de24f279453d0cb928fde4120f059fe7c1aa3971b
@@ -0,0 +1,163 @@
1
+ require 'open-uri'
2
+ require 'nokogiri'
3
+ require 'json'
4
+ require 'net/http'
5
+ require 'net/https'
6
+ require 'uri'
7
+ require 'pp'
8
+ require 'openssl'
9
+ require 'CSV'
10
+
11
+ class InstagramData
12
+ attr_reader :tag_name, :get_number, :instagram_data
13
+ TAG_URL_PREFIX = 'https://www.instagram.com/explore/tags/'
14
+
15
+ def initialize(tag_name: '岸和田', get_number: 10)
16
+ @tag_name = tag_name
17
+ @get_number = get_number
18
+ @got_number = 0
19
+ end
20
+
21
+ def getNextPageData(endCursor, csrfToken, rhx_gis, gotNumber)
22
+ uri = URI.parse(TAG_URL_PREFIX + URI.encode_www_form_component(@tag_name) + "/?__a=1&max_id=" + endCursor)
23
+ http = Net::HTTP.new(uri.host, uri.port)
24
+
25
+ http.use_ssl = true
26
+ http.verify_mode = OpenSSL::SSL::VERIFY_NONE
27
+
28
+ req = Net::HTTP::Get.new(uri.request_uri)
29
+
30
+ res = http.request(req)
31
+ endCursor = JSON.parse(res.body)['graphql']['hashtag']['edge_hashtag_to_media']['page_info']['end_cursor']
32
+ dataArray = JSON.parse(res.body)['graphql']['hashtag']['edge_hashtag_to_media']['edges']
33
+
34
+ return dataArray, endCursor
35
+ end
36
+
37
+ def parseInstagramData(dataArray)
38
+ dataArray.length.times do |i|
39
+ insta_data = {userId: '', timestamp: '', pageUrl: '', likeCount: '', commentCount: '', caption: ''}
40
+ # ユーザIDの取得
41
+ insta_data[:userId] = dataArray[i]['node']['owner']['id']
42
+ # UNIXタイムからの変換
43
+ insta_data[:timestamp] = Time.at(dataArray[i]['node']['taken_at_timestamp'])
44
+ # 画像URLの取得
45
+ # writeData.push(dataArray[i]["thumbnail"])
46
+ # ページURLの取得
47
+ insta_data[:pageUrl] = "https://www.instagram.com/p/" + dataArray[i]['node']['shortcode'] + "/"
48
+ # いいねの数とコメントの数
49
+ insta_data[:likeCount] = dataArray[i]['node']['edge_liked_by']['count']
50
+ insta_data[:commentCount] = dataArray[i]['node']['edge_media_to_comment']['count']
51
+
52
+ # 投稿者コメントの取得
53
+ insta_data[:caption] = (dataArray[i]['node']['edge_media_to_caption']['edges'][0]['node']['text'])
54
+
55
+ # 投稿者コメントからタグのみ抽出
56
+ tags = (dataArray[i]['node']['edge_media_to_caption']['edges'][0]['node']['text'] + " ").scan(/[#][A-Za-zA-Za-z一-鿆0-90-9ぁ-ヶヲ-゚ー○]+/).join(" ")
57
+ insta_data[:tags] = tags
58
+
59
+ @instagram_data.push(insta_data)
60
+ @got_number += 1
61
+ # 予定取得枚数に到達したら終了
62
+ break if @got_number >= @get_number
63
+ end
64
+ end
65
+
66
+ def getInstagramData
67
+ tag_search_url = TAG_URL_PREFIX + URI.encode_www_form_component(@tag_name)
68
+ @instagram_data = []
69
+
70
+ # 文字コード
71
+ charset = nil
72
+
73
+ puts "#{@tag_name} のデータを #{@get_number} 件分取得します"
74
+
75
+ # タグ検索ページへアクセス
76
+ # 文字コードを取得しながら、アクセス
77
+ html = open(tag_search_url) do |f|
78
+ charset = f.charset
79
+ f.read
80
+ end
81
+
82
+ # 以下、Nokogiriによるアクセスに必要な情報の取得処理
83
+ # 全部のHTMLを取得
84
+ allDoc = Nokogiri::HTML.parse(html, nil, charset)
85
+ # メタ情報だけ取得
86
+ metaInfo = allDoc.css('body script').first.text
87
+ # 前後に不要な情報があるのでカット
88
+ metaInfo.slice!(0, 21)
89
+ metaInfo = metaInfo.chop
90
+
91
+ # 解析用JSONの保存
92
+ response_json = JSON.parse(metaInfo)
93
+
94
+ # データの中身を取得
95
+ dataArray = response_json['entry_data']['TagPage'][0]['graphql']['hashtag']['edge_hashtag_to_media']['edges'];
96
+
97
+ # 初期ページの分を取得
98
+ parseInstagramData(dataArray)
99
+
100
+ # 取得した件数を記録
101
+ puts "#{@got_number} 件取得しました"
102
+
103
+ # 取得枚数に足りていない場合
104
+ while @got_number < @get_number do
105
+ # 次のページの取得に必要な情報を取得
106
+ # csrfトークンの取得
107
+ csrfToken = response_json['config']['csrf_token']
108
+ # rhx_gisの取得
109
+ rhx_gis = response_json['rhx_gis']
110
+ # 次のページ取得用のカーソル
111
+ @endCursor = response_json['entry_data']['TagPage'][0]['graphql']['hashtag']['edge_hashtag_to_media']['page_info']['end_cursor']
112
+
113
+ puts "5秒待ってから再開します"
114
+ sleep 5
115
+
116
+ dataArray, @endCursor = getNextPageData(@endCursor, csrfToken, rhx_gis, @got_number)
117
+
118
+ parseInstagramData(dataArray)
119
+
120
+ # 取得した件数を記録
121
+ puts "#{@got_number} 件取得しました"
122
+ end
123
+
124
+ end
125
+
126
+ # CSVファイルのヘッダを記入
127
+ def csvHeaderWrite(csvfilename)
128
+ CSV.open(csvfilename, "ab+") do |csv|
129
+ writeData = Array.new
130
+ writeData.push("ユーザID")
131
+ writeData.push("投稿日時(日本時間)")
132
+ writeData.push("ページURL")
133
+ writeData.push("いいねの数")
134
+ writeData.push("コメント数")
135
+ writeData.push("投稿者コメント")
136
+ writeData.push("ハッシュタグ")
137
+
138
+ csv << writeData
139
+ end
140
+ end
141
+
142
+ # CSVファイルへの書き込み
143
+ def csvDataWrite(dataArray, csvfilename)
144
+ dataArray.each do |n|
145
+ puts "n write n is #{n}"
146
+ CSV.open(csvfilename, "ab+") do |csv|
147
+ # データはハッシュなので配列にし、キーを除き、値を代入する
148
+ writeData = n.to_a.map{|e| e[1]}
149
+
150
+ csv << writeData
151
+ end
152
+ end
153
+ end
154
+
155
+ # CSVファイルへの書き込み
156
+ def writeToCSV(dataArray, csvfilename: "getInstagramData_#{Time.now.strftime("%Y%m%d%H%M%S")}.csv")
157
+ # csvファイルにヘッダを記入
158
+ csvHeaderWrite(csvfilename)
159
+ # csvファイルに保存
160
+ csvDataWrite(dataArray, csvfilename)
161
+ end
162
+
163
+ end
metadata ADDED
@@ -0,0 +1,64 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: instagram-tag-search
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ platform: ruby
6
+ authors:
7
+ - Tom syamoji
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2018-07-23 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: nokogiri
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: '1.5'
20
+ - - ">="
21
+ - !ruby/object:Gem::Version
22
+ version: 1.5.6
23
+ type: :runtime
24
+ prerelease: false
25
+ version_requirements: !ruby/object:Gem::Requirement
26
+ requirements:
27
+ - - "~>"
28
+ - !ruby/object:Gem::Version
29
+ version: '1.5'
30
+ - - ">="
31
+ - !ruby/object:Gem::Version
32
+ version: 1.5.6
33
+ description: This gem gets instagram data with tag search
34
+ email: uniquequeue@gmail.com
35
+ executables: []
36
+ extensions: []
37
+ extra_rdoc_files: []
38
+ files:
39
+ - lib/instagram-tag-search.rb
40
+ homepage: https://github.com/syamoji/instagram-tag-search
41
+ licenses:
42
+ - MIT
43
+ metadata: {}
44
+ post_install_message:
45
+ rdoc_options: []
46
+ require_paths:
47
+ - lib
48
+ required_ruby_version: !ruby/object:Gem::Requirement
49
+ requirements:
50
+ - - ">="
51
+ - !ruby/object:Gem::Version
52
+ version: '0'
53
+ required_rubygems_version: !ruby/object:Gem::Requirement
54
+ requirements:
55
+ - - ">="
56
+ - !ruby/object:Gem::Version
57
+ version: '0'
58
+ requirements: []
59
+ rubyforge_project:
60
+ rubygems_version: 2.5.2
61
+ signing_key:
62
+ specification_version: 4
63
+ summary: get instagram data with tag search
64
+ test_files: []