googleplus_markdown 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 4331b1a528b01eb53f78851457fb0b4b1eb53b18
4
+ data.tar.gz: fc88d751d7f92b182e608178074b39f1cacf07a2
5
+ SHA512:
6
+ metadata.gz: a00f492332390828aaba4318faa03f21eb8b4f1a7cc27eeda57b888a95fb852fb6898275dd346149e53546dd517b2784be4df06e65d3b34ea688437f2e66d1c4
7
+ data.tar.gz: cb92394ff49518c7f9821a2aa42c4ef70dc433fcc2829207b0849ae402721b0f5fd4a3d0145d7cf96d6752f8d9b25292cd5a3cdc7d638b22d857f34fe4a22ee6
@@ -0,0 +1,33 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ # gplus2markdown
4
+ #
5
+ # creator: Matthew Graybosch
6
+ # email: matthew@starbreakerseries.com
7
+ # created: 2 February 2015
8
+ #
9
+ # This program reads ARGF to get a JSON file
10
+ # created from the Google+ stream by Google Takeout. It
11
+ # then runs each file through a convert to generate a
12
+ # Markdown file with YAML front-matter suitable for use
13
+ # as a Jekyll post.
14
+ #
15
+ # Combine this with a shell script that will loop through
16
+ # a directory of JSON files to process multiple files.
17
+ #
18
+ # Just keep in mind that this script has no error handling
19
+ # or data validation logic. Feed it bad JSON and it will
20
+ # die screaming.
21
+
22
+ # We need this library to parse JSON data.
23
+ require 'json'
24
+ require 'googleplus_markdown'
25
+
26
+ converter = GooglePlusMarkdown.new
27
+
28
+ ARGV.each do | file |
29
+ puts file
30
+ raw_post = File.read(file)
31
+ data = JSON.parse(raw_post)
32
+ converter.convert(data)
33
+ end
@@ -0,0 +1,230 @@
1
+ # googleplus_markdown.rb
2
+ #
3
+ # creator: Matthew Graybosch
4
+ # email: matthew@starbreakerseries.com
5
+ # created: 9 February 2015
6
+ require 'date'
7
+ require 'json'
8
+
9
+ class GooglePlusMarkdown
10
+
11
+ def convert(data)
12
+ unless data['verb'] == "share" || data['access']['accessSummary'] == "Shared privately"
13
+ # Let's declare the output string.
14
+ markdown = ""
15
+
16
+ # I need the creation date so I can set the file name.
17
+ # Of course, Google can't just call it a 'created' date.
18
+ date_created = data['published']
19
+ file_name = "#{date_created}.md"
20
+
21
+ # Instead of doing multiple writes to a file, I'm going to
22
+ # build a string that I can pass around to various functions.
23
+ # When I'm done, I'll write *that* to the file.
24
+ markdown.concat(generate_front_matter(data))
25
+
26
+ # I'm getting the raw text of the post, with Google+'s half-assed
27
+ # implementation of Textile for markup.
28
+ markdown.concat("#{retrieve_post_content(data)}\n\n")
29
+
30
+ # Let's get some photos tacked onto the end, shall we?
31
+ markdown.concat("#{attach_images(data)}\n")
32
+
33
+ # If anybody replied to these posts, let's retrieve the replies
34
+ # and tack them onto the end. Some of them are actually worth a damn.
35
+ replies = data['object']['replies']['items']
36
+ unless(replies == nil)
37
+ markdown.concat("#{retrieve_replies(replies)}")
38
+ end
39
+
40
+ handle_file_io(markdown, file_name)
41
+ end
42
+ end
43
+
44
+ private
45
+
46
+ def generate_front_matter(data)
47
+ content = ""
48
+
49
+ attachment = Hash.new
50
+ unless data['object']['attachments'] == nil
51
+ attachment = data['object']['attachments'][0]
52
+ end
53
+
54
+ # you need this before and after a YAML front matter block.
55
+ yaml_delimiter = "---"
56
+
57
+ # Google+ post titles are wonky. Let's just take the first line.
58
+ # by default. You can always edit the markdown file afterward.
59
+ # I'm also stripping out the newline character at the end.
60
+ if attachment != nil && attachment['displayName']
61
+ post_title = attachment['displayName']
62
+ elsif data['title'].length != 0
63
+ post_title = data['title'].lines.first.delete!("\n")
64
+ else
65
+ post_title = "No Title Available"
66
+ end
67
+
68
+ # I'm getting the raw text of the post, with Google+'s half-assed
69
+ # implementation of Textile for markup.
70
+ post_content = data['object']['originalContent']
71
+
72
+ # We'll use the first 150 charcters as the post excerpt.
73
+ # If you use the Markdown output in Jekyll, it'll put this
74
+ # in the description meta tag, which used to be good for SEO.
75
+ # Maybe it still is, but I don't get paid to give a shit.
76
+ # We'll replace newlines with spaces here. Edit as needed.
77
+ if post_content != nil
78
+ post_excerpt = post_content[0..150].gsub("\n", " ")
79
+ elsif attachment != nil && attachment['objectType'] == "video"
80
+ post_excerpt = attachment['displayName']
81
+ else
82
+ post_excerpt = "No Excerpt Available"
83
+ end
84
+
85
+ # Let's clean up the excerpt a bit.
86
+ processed_excerpt = process_post_content(post_excerpt)
87
+
88
+ # Google can't use "created" and "modified"; that would
89
+ # make too much sense. They used "published" and "updated"
90
+ # instead, respectively. We'll use the published date
91
+ # to name the Markdown output file, and include it in the
92
+ # YAML front matter. If the updated date is different,
93
+ # I'll include that too.
94
+ #
95
+ # Of course, you'll want to rename each markdown file to match
96
+ # the format specified in the Jekyll documentation at
97
+ # http://jekyllrb.com/docs/posts/. Otherwise, your permalinks
98
+ # will probably look like shit.
99
+ date_created = data['published']
100
+ date_modified = data['updated']
101
+
102
+ # let's build the output string now.
103
+ content.concat("#{yaml_delimiter}\n")
104
+ content.concat("layout: post\n")
105
+ content.concat("title: \"#{post_title}\"\n")
106
+ content.concat("categories: blog\n")
107
+ content.concat("excerpt: \"#{processed_excerpt}\"\n")
108
+
109
+ # Let's see if there's an article attached.
110
+ unless(attachment == nil)
111
+ object_type = attachment['objectType']
112
+
113
+ case object_type
114
+ when "video"
115
+ video_url = attachment['url']
116
+ content.concat("link: \"#{video_url}\"\n")
117
+ when "article"
118
+ article_url = attachment['url']
119
+ article_title = attachment['displayName']
120
+
121
+ content.concat("link: \"#{article_url}\"\n")
122
+ content.concat("linkTitle: \"#{article_title}\"\n")
123
+ end
124
+ end
125
+
126
+ # Check for an updated date and add it if it exists.
127
+ content.concat("date: #{date_created}\n")
128
+ unless(date_created == date_modified)
129
+ content.concat("updated: #{date_modified}\n")
130
+ end
131
+ content.concat("#{yaml_delimiter}\n")
132
+
133
+ return content
134
+ end
135
+
136
+ def process_post_content(content)
137
+ # Pretty much the only thing Google+ took from Textile markup
138
+ # is their notation for bold and italic text. I'm converting it
139
+ # to Markdown style here in a quick and dirty 2 stage process.
140
+ content.gsub!("\*", "\*\*")
141
+ content.gsub!("\_", "\*")
142
+
143
+ # Google+ mentions are complicated. They resolve to names when
144
+ # rendered in the desktop or mobile app, but are stored as
145
+ # 21-digit numbers prefixed with an @ symbol. I can convert these
146
+ # to Markdown-formatted links with URLs you can paste into your
147
+ # browser to actually get the names.
148
+ mentions = content.scan(/\@(?:[0-9]{21})/).flatten
149
+ mentions.each { |m| content.gsub!(m, "[SOMEBODY](https://plus.google.com/#{m.gsub("@", "")})") }
150
+
151
+ return content
152
+ end
153
+
154
+ def retrieve_post_content(data)
155
+ post_content = data['object']['originalContent']
156
+
157
+ attachment = Hash.new
158
+ unless data['object']['attachments'] == nil
159
+ attachment = data['object']['attachments'][0]
160
+ end
161
+
162
+ if post_content != nil
163
+ # Processing the content gets complicated when you factor
164
+ # in mentions. We'll define a function for this.
165
+ processed_content = process_post_content(post_content)
166
+ elsif attachment['objectType'] == "video"
167
+ processed_content = data['annotation']
168
+ else
169
+ processed_content = "No Post Text Available"
170
+ end
171
+
172
+ return processed_content
173
+ end
174
+
175
+ def retrieve_replies(replies)
176
+ content = "\#\# Comments from Google+ users\n\n"
177
+
178
+ replies.each do |reply|
179
+ name = reply['actor']['displayName']
180
+ profile_url = reply['actor']['url']
181
+ reply_date = Date.parse(reply['published'])
182
+ reply_text = reply['object']['content']
183
+
184
+ content.concat("\#\#\#\#[#{name}](#{profile_url})\n")
185
+ content.concat("**posted on #{reply_date.strftime("%A, %m/%d/%Y at %I:%M%P %Z")}**\n\n")
186
+ content.concat("#{reply_text}\n\n")
187
+ end
188
+
189
+ return content
190
+ end
191
+
192
+ def attach_images(data)
193
+ content = ""
194
+ attachments = data['object']['attachments']
195
+
196
+ unless(attachments == nil)
197
+ attachments.each do |attachment|
198
+ object_type = attachment['objectType']
199
+
200
+ unless(object_type != "photo")
201
+ id = attachment['id']
202
+ url = attachment['fullImage']['url']
203
+
204
+ content.concat("![#{id}](#{url})")
205
+ end
206
+ end
207
+ end
208
+
209
+ return content
210
+ end
211
+
212
+ def handle_file_io(content, filename)
213
+ # Now the fun begins. First, let's see if the output directory exists.
214
+ # If it doesn't, create it.
215
+ output_dir = "./markdown"
216
+ unless(Dir.exist?(output_dir))
217
+ Dir.mkdir(output_dir)
218
+ end
219
+
220
+ # Generate the file path.
221
+ file_path = File.join(output_dir, filename)
222
+
223
+ # Now let's open the file to write...
224
+ output = File.open(file_path, "w")
225
+
226
+ # Put in the post content, close the file, and we're done.
227
+ output.puts(content)
228
+ output.close
229
+ end
230
+ end
metadata ADDED
@@ -0,0 +1,67 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: googleplus_markdown
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.2.0
5
+ platform: ruby
6
+ authors:
7
+ - Matthew Graybosch
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2015-02-10 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: json
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: 1.8.2
20
+ - - ">="
21
+ - !ruby/object:Gem::Version
22
+ version: 1.8.2
23
+ type: :runtime
24
+ prerelease: false
25
+ version_requirements: !ruby/object:Gem::Requirement
26
+ requirements:
27
+ - - "~>"
28
+ - !ruby/object:Gem::Version
29
+ version: 1.8.2
30
+ - - ">="
31
+ - !ruby/object:Gem::Version
32
+ version: 1.8.2
33
+ description: Use this gem to convert Google+ JSON generated by Google Takeout to Markdown.
34
+ Includes YAML front-matter for use with Jekyll.
35
+ email: matthew@starbreakerseries.com
36
+ executables:
37
+ - gplus2markdown
38
+ extensions: []
39
+ extra_rdoc_files: []
40
+ files:
41
+ - bin/gplus2markdown
42
+ - lib/googleplus_markdown.rb
43
+ homepage: https://github.com/demifiend/gplus2markdown
44
+ licenses:
45
+ - MIT
46
+ metadata: {}
47
+ post_install_message:
48
+ rdoc_options: []
49
+ require_paths:
50
+ - lib
51
+ required_ruby_version: !ruby/object:Gem::Requirement
52
+ requirements:
53
+ - - ">="
54
+ - !ruby/object:Gem::Version
55
+ version: '0'
56
+ required_rubygems_version: !ruby/object:Gem::Requirement
57
+ requirements:
58
+ - - ">="
59
+ - !ruby/object:Gem::Version
60
+ version: '0'
61
+ requirements: []
62
+ rubyforge_project:
63
+ rubygems_version: 2.4.5
64
+ signing_key:
65
+ specification_version: 4
66
+ summary: Coverts Google+ JSON to Markdown
67
+ test_files: []