googleplus_markdown 0.2.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/bin/gplus2markdown +33 -0
- data/lib/googleplus_markdown.rb +230 -0
- metadata +67 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 4331b1a528b01eb53f78851457fb0b4b1eb53b18
|
4
|
+
data.tar.gz: fc88d751d7f92b182e608178074b39f1cacf07a2
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: a00f492332390828aaba4318faa03f21eb8b4f1a7cc27eeda57b888a95fb852fb6898275dd346149e53546dd517b2784be4df06e65d3b34ea688437f2e66d1c4
|
7
|
+
data.tar.gz: cb92394ff49518c7f9821a2aa42c4ef70dc433fcc2829207b0849ae402721b0f5fd4a3d0145d7cf96d6752f8d9b25292cd5a3cdc7d638b22d857f34fe4a22ee6
|
data/bin/gplus2markdown
ADDED
@@ -0,0 +1,33 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
# gplus2markdown
|
4
|
+
#
|
5
|
+
# creator: Matthew Graybosch
|
6
|
+
# email: matthew@starbreakerseries.com
|
7
|
+
# created: 2 February 2015
|
8
|
+
#
|
9
|
+
# This program reads ARGF to get a JSON file
|
10
|
+
# created from the Google+ stream by Google Takeout. It
|
11
|
+
# then runs each file through a convert to generate a
|
12
|
+
# Markdown file with YAML front-matter suitable for use
|
13
|
+
# as a Jekyll post.
|
14
|
+
#
|
15
|
+
# Combine this with a shell script that will loop through
|
16
|
+
# a directory of JSON files to process multiple files.
|
17
|
+
#
|
18
|
+
# Just keep in mind that this script has no error handling
|
19
|
+
# or data validation logic. Feed it bad JSON and it will
|
20
|
+
# die screaming.
|
21
|
+
|
22
|
+
# We need this library to parse JSON data.
|
23
|
+
require 'json'
|
24
|
+
require 'googleplus_markdown'
|
25
|
+
|
26
|
+
converter = GooglePlusMarkdown.new
|
27
|
+
|
28
|
+
ARGV.each do | file |
|
29
|
+
puts file
|
30
|
+
raw_post = File.read(file)
|
31
|
+
data = JSON.parse(raw_post)
|
32
|
+
converter.convert(data)
|
33
|
+
end
|
@@ -0,0 +1,230 @@
|
|
1
|
+
# googleplus_markdown.rb
|
2
|
+
#
|
3
|
+
# creator: Matthew Graybosch
|
4
|
+
# email: matthew@starbreakerseries.com
|
5
|
+
# created: 9 February 2015
|
6
|
+
require 'date'
|
7
|
+
require 'json'
|
8
|
+
|
9
|
+
class GooglePlusMarkdown
|
10
|
+
|
11
|
+
def convert(data)
|
12
|
+
unless data['verb'] == "share" || data['access']['accessSummary'] == "Shared privately"
|
13
|
+
# Let's declare the output string.
|
14
|
+
markdown = ""
|
15
|
+
|
16
|
+
# I need the creation date so I can set the file name.
|
17
|
+
# Of course, Google can't just call it a 'created' date.
|
18
|
+
date_created = data['published']
|
19
|
+
file_name = "#{date_created}.md"
|
20
|
+
|
21
|
+
# Instead of doing multiple writes to a file, I'm going to
|
22
|
+
# build a string that I can pass around to various functions.
|
23
|
+
# When I'm done, I'll write *that* to the file.
|
24
|
+
markdown.concat(generate_front_matter(data))
|
25
|
+
|
26
|
+
# I'm getting the raw text of the post, with Google+'s half-assed
|
27
|
+
# implementation of Textile for markup.
|
28
|
+
markdown.concat("#{retrieve_post_content(data)}\n\n")
|
29
|
+
|
30
|
+
# Let's get some photos tacked onto the end, shall we?
|
31
|
+
markdown.concat("#{attach_images(data)}\n")
|
32
|
+
|
33
|
+
# If anybody replied to these posts, let's retrieve the replies
|
34
|
+
# and tack them onto the end. Some of them are actually worth a damn.
|
35
|
+
replies = data['object']['replies']['items']
|
36
|
+
unless(replies == nil)
|
37
|
+
markdown.concat("#{retrieve_replies(replies)}")
|
38
|
+
end
|
39
|
+
|
40
|
+
handle_file_io(markdown, file_name)
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
44
|
+
private
|
45
|
+
|
46
|
+
def generate_front_matter(data)
|
47
|
+
content = ""
|
48
|
+
|
49
|
+
attachment = Hash.new
|
50
|
+
unless data['object']['attachments'] == nil
|
51
|
+
attachment = data['object']['attachments'][0]
|
52
|
+
end
|
53
|
+
|
54
|
+
# you need this before and after a YAML front matter block.
|
55
|
+
yaml_delimiter = "---"
|
56
|
+
|
57
|
+
# Google+ post titles are wonky. Let's just take the first line.
|
58
|
+
# by default. You can always edit the markdown file afterward.
|
59
|
+
# I'm also stripping out the newline character at the end.
|
60
|
+
if attachment != nil && attachment['displayName']
|
61
|
+
post_title = attachment['displayName']
|
62
|
+
elsif data['title'].length != 0
|
63
|
+
post_title = data['title'].lines.first.delete!("\n")
|
64
|
+
else
|
65
|
+
post_title = "No Title Available"
|
66
|
+
end
|
67
|
+
|
68
|
+
# I'm getting the raw text of the post, with Google+'s half-assed
|
69
|
+
# implementation of Textile for markup.
|
70
|
+
post_content = data['object']['originalContent']
|
71
|
+
|
72
|
+
# We'll use the first 150 charcters as the post excerpt.
|
73
|
+
# If you use the Markdown output in Jekyll, it'll put this
|
74
|
+
# in the description meta tag, which used to be good for SEO.
|
75
|
+
# Maybe it still is, but I don't get paid to give a shit.
|
76
|
+
# We'll replace newlines with spaces here. Edit as needed.
|
77
|
+
if post_content != nil
|
78
|
+
post_excerpt = post_content[0..150].gsub("\n", " ")
|
79
|
+
elsif attachment != nil && attachment['objectType'] == "video"
|
80
|
+
post_excerpt = attachment['displayName']
|
81
|
+
else
|
82
|
+
post_excerpt = "No Excerpt Available"
|
83
|
+
end
|
84
|
+
|
85
|
+
# Let's clean up the excerpt a bit.
|
86
|
+
processed_excerpt = process_post_content(post_excerpt)
|
87
|
+
|
88
|
+
# Google can't use "created" and "modified"; that would
|
89
|
+
# make too much sense. They used "published" and "updated"
|
90
|
+
# instead, respectively. We'll use the published date
|
91
|
+
# to name the Markdown output file, and include it in the
|
92
|
+
# YAML front matter. If the updated date is different,
|
93
|
+
# I'll include that too.
|
94
|
+
#
|
95
|
+
# Of course, you'll want to rename each markdown file to match
|
96
|
+
# the format specified in the Jekyll documentation at
|
97
|
+
# http://jekyllrb.com/docs/posts/. Otherwise, your permalinks
|
98
|
+
# will probably look like shit.
|
99
|
+
date_created = data['published']
|
100
|
+
date_modified = data['updated']
|
101
|
+
|
102
|
+
# let's build the output string now.
|
103
|
+
content.concat("#{yaml_delimiter}\n")
|
104
|
+
content.concat("layout: post\n")
|
105
|
+
content.concat("title: \"#{post_title}\"\n")
|
106
|
+
content.concat("categories: blog\n")
|
107
|
+
content.concat("excerpt: \"#{processed_excerpt}\"\n")
|
108
|
+
|
109
|
+
# Let's see if there's an article attached.
|
110
|
+
unless(attachment == nil)
|
111
|
+
object_type = attachment['objectType']
|
112
|
+
|
113
|
+
case object_type
|
114
|
+
when "video"
|
115
|
+
video_url = attachment['url']
|
116
|
+
content.concat("link: \"#{video_url}\"\n")
|
117
|
+
when "article"
|
118
|
+
article_url = attachment['url']
|
119
|
+
article_title = attachment['displayName']
|
120
|
+
|
121
|
+
content.concat("link: \"#{article_url}\"\n")
|
122
|
+
content.concat("linkTitle: \"#{article_title}\"\n")
|
123
|
+
end
|
124
|
+
end
|
125
|
+
|
126
|
+
# Check for an updated date and add it if it exists.
|
127
|
+
content.concat("date: #{date_created}\n")
|
128
|
+
unless(date_created == date_modified)
|
129
|
+
content.concat("updated: #{date_modified}\n")
|
130
|
+
end
|
131
|
+
content.concat("#{yaml_delimiter}\n")
|
132
|
+
|
133
|
+
return content
|
134
|
+
end
|
135
|
+
|
136
|
+
def process_post_content(content)
|
137
|
+
# Pretty much the only thing Google+ took from Textile markup
|
138
|
+
# is their notation for bold and italic text. I'm converting it
|
139
|
+
# to Markdown style here in a quick and dirty 2 stage process.
|
140
|
+
content.gsub!("\*", "\*\*")
|
141
|
+
content.gsub!("\_", "\*")
|
142
|
+
|
143
|
+
# Google+ mentions are complicated. They resolve to names when
|
144
|
+
# rendered in the desktop or mobile app, but are stored as
|
145
|
+
# 21-digit numbers prefixed with an @ symbol. I can convert these
|
146
|
+
# to Markdown-formatted links with URLs you can paste into your
|
147
|
+
# browser to actually get the names.
|
148
|
+
mentions = content.scan(/\@(?:[0-9]{21})/).flatten
|
149
|
+
mentions.each { |m| content.gsub!(m, "[SOMEBODY](https://plus.google.com/#{m.gsub("@", "")})") }
|
150
|
+
|
151
|
+
return content
|
152
|
+
end
|
153
|
+
|
154
|
+
def retrieve_post_content(data)
|
155
|
+
post_content = data['object']['originalContent']
|
156
|
+
|
157
|
+
attachment = Hash.new
|
158
|
+
unless data['object']['attachments'] == nil
|
159
|
+
attachment = data['object']['attachments'][0]
|
160
|
+
end
|
161
|
+
|
162
|
+
if post_content != nil
|
163
|
+
# Processing the content gets complicated when you factor
|
164
|
+
# in mentions. We'll define a function for this.
|
165
|
+
processed_content = process_post_content(post_content)
|
166
|
+
elsif attachment['objectType'] == "video"
|
167
|
+
processed_content = data['annotation']
|
168
|
+
else
|
169
|
+
processed_content = "No Post Text Available"
|
170
|
+
end
|
171
|
+
|
172
|
+
return processed_content
|
173
|
+
end
|
174
|
+
|
175
|
+
def retrieve_replies(replies)
|
176
|
+
content = "\#\# Comments from Google+ users\n\n"
|
177
|
+
|
178
|
+
replies.each do |reply|
|
179
|
+
name = reply['actor']['displayName']
|
180
|
+
profile_url = reply['actor']['url']
|
181
|
+
reply_date = Date.parse(reply['published'])
|
182
|
+
reply_text = reply['object']['content']
|
183
|
+
|
184
|
+
content.concat("\#\#\#\#[#{name}](#{profile_url})\n")
|
185
|
+
content.concat("**posted on #{reply_date.strftime("%A, %m/%d/%Y at %I:%M%P %Z")}**\n\n")
|
186
|
+
content.concat("#{reply_text}\n\n")
|
187
|
+
end
|
188
|
+
|
189
|
+
return content
|
190
|
+
end
|
191
|
+
|
192
|
+
def attach_images(data)
|
193
|
+
content = ""
|
194
|
+
attachments = data['object']['attachments']
|
195
|
+
|
196
|
+
unless(attachments == nil)
|
197
|
+
attachments.each do |attachment|
|
198
|
+
object_type = attachment['objectType']
|
199
|
+
|
200
|
+
unless(object_type != "photo")
|
201
|
+
id = attachment['id']
|
202
|
+
url = attachment['fullImage']['url']
|
203
|
+
|
204
|
+
content.concat("![#{id}](#{url})")
|
205
|
+
end
|
206
|
+
end
|
207
|
+
end
|
208
|
+
|
209
|
+
return content
|
210
|
+
end
|
211
|
+
|
212
|
+
def handle_file_io(content, filename)
|
213
|
+
# Now the fun begins. First, let's see if the output directory exists.
|
214
|
+
# If it doesn't, create it.
|
215
|
+
output_dir = "./markdown"
|
216
|
+
unless(Dir.exist?(output_dir))
|
217
|
+
Dir.mkdir(output_dir)
|
218
|
+
end
|
219
|
+
|
220
|
+
# Generate the file path.
|
221
|
+
file_path = File.join(output_dir, filename)
|
222
|
+
|
223
|
+
# Now let's open the file to write...
|
224
|
+
output = File.open(file_path, "w")
|
225
|
+
|
226
|
+
# Put in the post content, close the file, and we're done.
|
227
|
+
output.puts(content)
|
228
|
+
output.close
|
229
|
+
end
|
230
|
+
end
|
metadata
ADDED
@@ -0,0 +1,67 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: googleplus_markdown
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.2.0
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Matthew Graybosch
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2015-02-10 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: json
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - "~>"
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: 1.8.2
|
20
|
+
- - ">="
|
21
|
+
- !ruby/object:Gem::Version
|
22
|
+
version: 1.8.2
|
23
|
+
type: :runtime
|
24
|
+
prerelease: false
|
25
|
+
version_requirements: !ruby/object:Gem::Requirement
|
26
|
+
requirements:
|
27
|
+
- - "~>"
|
28
|
+
- !ruby/object:Gem::Version
|
29
|
+
version: 1.8.2
|
30
|
+
- - ">="
|
31
|
+
- !ruby/object:Gem::Version
|
32
|
+
version: 1.8.2
|
33
|
+
description: Use this gem to convert Google+ JSON generated by Google Takeout to Markdown.
|
34
|
+
Includes YAML front-matter for use with Jekyll.
|
35
|
+
email: matthew@starbreakerseries.com
|
36
|
+
executables:
|
37
|
+
- gplus2markdown
|
38
|
+
extensions: []
|
39
|
+
extra_rdoc_files: []
|
40
|
+
files:
|
41
|
+
- bin/gplus2markdown
|
42
|
+
- lib/googleplus_markdown.rb
|
43
|
+
homepage: https://github.com/demifiend/gplus2markdown
|
44
|
+
licenses:
|
45
|
+
- MIT
|
46
|
+
metadata: {}
|
47
|
+
post_install_message:
|
48
|
+
rdoc_options: []
|
49
|
+
require_paths:
|
50
|
+
- lib
|
51
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
52
|
+
requirements:
|
53
|
+
- - ">="
|
54
|
+
- !ruby/object:Gem::Version
|
55
|
+
version: '0'
|
56
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
57
|
+
requirements:
|
58
|
+
- - ">="
|
59
|
+
- !ruby/object:Gem::Version
|
60
|
+
version: '0'
|
61
|
+
requirements: []
|
62
|
+
rubyforge_project:
|
63
|
+
rubygems_version: 2.4.5
|
64
|
+
signing_key:
|
65
|
+
specification_version: 4
|
66
|
+
summary: Coverts Google+ JSON to Markdown
|
67
|
+
test_files: []
|