instagram-tag-search 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (3) hide show
  1. checksums.yaml +7 -0
  2. data/lib/instagram-tag-search.rb +163 -0
  3. metadata +64 -0
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: a588894582381be8e200cd8cb41b6fcf6f02f3f9
4
+ data.tar.gz: 1d9855124ea0c24f5d43c58b295938f023e0f7b1
5
+ SHA512:
6
+ metadata.gz: df48f21760dcdccf2e6b995e33655f6b8b2b2e300ec3ac431cfd76297f3f53a666913415610a8f97450f8d86603b132cdeccd40a36bc5fab318efa8598dc654b
7
+ data.tar.gz: 1078cf8db43bfb851c83461477c225f47921c58df713ab2730969fc0ade5d18d744b101acd0acdc72abda05de24f279453d0cb928fde4120f059fe7c1aa3971b
@@ -0,0 +1,163 @@
1
+ require 'open-uri'
2
+ require 'nokogiri'
3
+ require 'json'
4
+ require 'net/http'
5
+ require 'net/https'
6
+ require 'uri'
7
+ require 'pp'
8
+ require 'openssl'
9
+ require 'CSV'
10
+
11
+ class InstagramData
12
+ attr_reader :tag_name, :get_number, :instagram_data
13
+ TAG_URL_PREFIX = 'https://www.instagram.com/explore/tags/'
14
+
15
+ def initialize(tag_name: '岸和田', get_number: 10)
16
+ @tag_name = tag_name
17
+ @get_number = get_number
18
+ @got_number = 0
19
+ end
20
+
21
+ def getNextPageData(endCursor, csrfToken, rhx_gis, gotNumber)
22
+ uri = URI.parse(TAG_URL_PREFIX + URI.encode_www_form_component(@tag_name) + "/?__a=1&max_id=" + endCursor)
23
+ http = Net::HTTP.new(uri.host, uri.port)
24
+
25
+ http.use_ssl = true
26
+ http.verify_mode = OpenSSL::SSL::VERIFY_NONE
27
+
28
+ req = Net::HTTP::Get.new(uri.request_uri)
29
+
30
+ res = http.request(req)
31
+ endCursor = JSON.parse(res.body)['graphql']['hashtag']['edge_hashtag_to_media']['page_info']['end_cursor']
32
+ dataArray = JSON.parse(res.body)['graphql']['hashtag']['edge_hashtag_to_media']['edges']
33
+
34
+ return dataArray, endCursor
35
+ end
36
+
37
+ def parseInstagramData(dataArray)
38
+ dataArray.length.times do |i|
39
+ insta_data = {userId: '', timestamp: '', pageUrl: '', likeCount: '', commentCount: '', caption: ''}
40
+ # ユーザIDの取得
41
+ insta_data[:userId] = dataArray[i]['node']['owner']['id']
42
+ # UNIXタイムからの変換
43
+ insta_data[:timestamp] = Time.at(dataArray[i]['node']['taken_at_timestamp'])
44
+ # 画像URLの取得
45
+ # writeData.push(dataArray[i]["thumbnail"])
46
+ # ページURLの取得
47
+ insta_data[:pageUrl] = "https://www.instagram.com/p/" + dataArray[i]['node']['shortcode'] + "/"
48
+ # いいねの数とコメントの数
49
+ insta_data[:likeCount] = dataArray[i]['node']['edge_liked_by']['count']
50
+ insta_data[:commentCount] = dataArray[i]['node']['edge_media_to_comment']['count']
51
+
52
+ # 投稿者コメントの取得
53
+ insta_data[:caption] = (dataArray[i]['node']['edge_media_to_caption']['edges'][0]['node']['text'])
54
+
55
+ # 投稿者コメントからタグのみ抽出
56
+ tags = (dataArray[i]['node']['edge_media_to_caption']['edges'][0]['node']['text'] + " ").scan(/[#][A-Za-zA-Za-z一-鿆0-90-9ぁ-ヶヲ-゚ー○]+/).join(" ")
57
+ insta_data[:tags] = tags
58
+
59
+ @instagram_data.push(insta_data)
60
+ @got_number += 1
61
+ # 予定取得枚数に到達したら終了
62
+ break if @got_number >= @get_number
63
+ end
64
+ end
65
+
66
+ def getInstagramData
67
+ tag_search_url = TAG_URL_PREFIX + URI.encode_www_form_component(@tag_name)
68
+ @instagram_data = []
69
+
70
+ # 文字コード
71
+ charset = nil
72
+
73
+ puts "#{@tag_name} のデータを #{@get_number} 件分取得します"
74
+
75
+ # タグ検索ページへアクセス
76
+ # 文字コードを取得しながら、アクセス
77
+ html = open(tag_search_url) do |f|
78
+ charset = f.charset
79
+ f.read
80
+ end
81
+
82
+ # 以下、Nokogiriによるアクセスに必要な情報の取得処理
83
+ # 全部のHTMLを取得
84
+ allDoc = Nokogiri::HTML.parse(html, nil, charset)
85
+ # メタ情報だけ取得
86
+ metaInfo = allDoc.css('body script').first.text
87
+ # 前後に不要な情報があるのでカット
88
+ metaInfo.slice!(0, 21)
89
+ metaInfo = metaInfo.chop
90
+
91
+ # 解析用JSONの保存
92
+ response_json = JSON.parse(metaInfo)
93
+
94
+ # データの中身を取得
95
+ dataArray = response_json['entry_data']['TagPage'][0]['graphql']['hashtag']['edge_hashtag_to_media']['edges'];
96
+
97
+ # 初期ページの分を取得
98
+ parseInstagramData(dataArray)
99
+
100
+ # 取得した件数を記録
101
+ puts "#{@got_number} 件取得しました"
102
+
103
+ # 取得枚数に足りていない場合
104
+ while @got_number < @get_number do
105
+ # 次のページの取得に必要な情報を取得
106
+ # csrfトークンの取得
107
+ csrfToken = response_json['config']['csrf_token']
108
+ # rhx_gisの取得
109
+ rhx_gis = response_json['rhx_gis']
110
+ # 次のページ取得用のカーソル
111
+ @endCursor = response_json['entry_data']['TagPage'][0]['graphql']['hashtag']['edge_hashtag_to_media']['page_info']['end_cursor']
112
+
113
+ puts "5秒待ってから再開します"
114
+ sleep 5
115
+
116
+ dataArray, @endCursor = getNextPageData(@endCursor, csrfToken, rhx_gis, @got_number)
117
+
118
+ parseInstagramData(dataArray)
119
+
120
+ # 取得した件数を記録
121
+ puts "#{@got_number} 件取得しました"
122
+ end
123
+
124
+ end
125
+
126
+ # CSVファイルのヘッダを記入
127
+ def csvHeaderWrite(csvfilename)
128
+ CSV.open(csvfilename, "ab+") do |csv|
129
+ writeData = Array.new
130
+ writeData.push("ユーザID")
131
+ writeData.push("投稿日時(日本時間)")
132
+ writeData.push("ページURL")
133
+ writeData.push("いいねの数")
134
+ writeData.push("コメント数")
135
+ writeData.push("投稿者コメント")
136
+ writeData.push("ハッシュタグ")
137
+
138
+ csv << writeData
139
+ end
140
+ end
141
+
142
+ # CSVファイルへの書き込み
143
+ def csvDataWrite(dataArray, csvfilename)
144
+ dataArray.each do |n|
145
+ puts "n write n is #{n}"
146
+ CSV.open(csvfilename, "ab+") do |csv|
147
+ # データはハッシュなので配列にし、キーを除き、値を代入する
148
+ writeData = n.to_a.map{|e| e[1]}
149
+
150
+ csv << writeData
151
+ end
152
+ end
153
+ end
154
+
155
+ # CSVファイルへの書き込み
156
+ def writeToCSV(dataArray, csvfilename: "getInstagramData_#{Time.now.strftime("%Y%m%d%H%M%S")}.csv")
157
+ # csvファイルにヘッダを記入
158
+ csvHeaderWrite(csvfilename)
159
+ # csvファイルに保存
160
+ csvDataWrite(dataArray, csvfilename)
161
+ end
162
+
163
+ end
metadata ADDED
@@ -0,0 +1,64 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: instagram-tag-search
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ platform: ruby
6
+ authors:
7
+ - Tom syamoji
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2018-07-23 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: nokogiri
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: '1.5'
20
+ - - ">="
21
+ - !ruby/object:Gem::Version
22
+ version: 1.5.6
23
+ type: :runtime
24
+ prerelease: false
25
+ version_requirements: !ruby/object:Gem::Requirement
26
+ requirements:
27
+ - - "~>"
28
+ - !ruby/object:Gem::Version
29
+ version: '1.5'
30
+ - - ">="
31
+ - !ruby/object:Gem::Version
32
+ version: 1.5.6
33
+ description: This gem gets instagram data with tag search
34
+ email: uniquequeue@gmail.com
35
+ executables: []
36
+ extensions: []
37
+ extra_rdoc_files: []
38
+ files:
39
+ - lib/instagram-tag-search.rb
40
+ homepage: https://github.com/syamoji/instagram-tag-search
41
+ licenses:
42
+ - MIT
43
+ metadata: {}
44
+ post_install_message:
45
+ rdoc_options: []
46
+ require_paths:
47
+ - lib
48
+ required_ruby_version: !ruby/object:Gem::Requirement
49
+ requirements:
50
+ - - ">="
51
+ - !ruby/object:Gem::Version
52
+ version: '0'
53
+ required_rubygems_version: !ruby/object:Gem::Requirement
54
+ requirements:
55
+ - - ">="
56
+ - !ruby/object:Gem::Version
57
+ version: '0'
58
+ requirements: []
59
+ rubyforge_project:
60
+ rubygems_version: 2.5.2
61
+ signing_key:
62
+ specification_version: 4
63
+ summary: get instagram data with tag search
64
+ test_files: []