googleplus_markdown 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 4331b1a528b01eb53f78851457fb0b4b1eb53b18
4
+ data.tar.gz: fc88d751d7f92b182e608178074b39f1cacf07a2
5
+ SHA512:
6
+ metadata.gz: a00f492332390828aaba4318faa03f21eb8b4f1a7cc27eeda57b888a95fb852fb6898275dd346149e53546dd517b2784be4df06e65d3b34ea688437f2e66d1c4
7
+ data.tar.gz: cb92394ff49518c7f9821a2aa42c4ef70dc433fcc2829207b0849ae402721b0f5fd4a3d0145d7cf96d6752f8d9b25292cd5a3cdc7d638b22d857f34fe4a22ee6
@@ -0,0 +1,33 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ # gplus2markdown
4
+ #
5
+ # creator: Matthew Graybosch
6
+ # email: matthew@starbreakerseries.com
7
+ # created: 2 February 2015
8
+ #
9
+ # This program reads ARGF to get a JSON file
10
+ # created from the Google+ stream by Google Takeout. It
11
+ # then runs each file through a convert to generate a
12
+ # Markdown file with YAML front-matter suitable for use
13
+ # as a Jekyll post.
14
+ #
15
+ # Combine this with a shell script that will loop through
16
+ # a directory of JSON files to process multiple files.
17
+ #
18
+ # Just keep in mind that this script has no error handling
19
+ # or data validation logic. Feed it bad JSON and it will
20
+ # die screaming.
21
+
22
+ # We need this library to parse JSON data.
23
+ require 'json'
24
+ require 'googleplus_markdown'
25
+
26
+ converter = GooglePlusMarkdown.new
27
+
28
+ ARGV.each do | file |
29
+ puts file
30
+ raw_post = File.read(file)
31
+ data = JSON.parse(raw_post)
32
+ converter.convert(data)
33
+ end
@@ -0,0 +1,230 @@
1
+ # googleplus_markdown.rb
2
+ #
3
+ # creator: Matthew Graybosch
4
+ # email: matthew@starbreakerseries.com
5
+ # created: 9 February 2015
6
+ require 'date'
7
+ require 'json'
8
+
9
+ class GooglePlusMarkdown
10
+
11
+ def convert(data)
12
+ unless data['verb'] == "share" || data['access']['accessSummary'] == "Shared privately"
13
+ # Let's declare the output string.
14
+ markdown = ""
15
+
16
+ # I need the creation date so I can set the file name.
17
+ # Of course, Google can't just call it a 'created' date.
18
+ date_created = data['published']
19
+ file_name = "#{date_created}.md"
20
+
21
+ # Instead of doing multiple writes to a file, I'm going to
22
+ # build a string that I can pass around to various functions.
23
+ # When I'm done, I'll write *that* to the file.
24
+ markdown.concat(generate_front_matter(data))
25
+
26
+ # I'm getting the raw text of the post, with Google+'s half-assed
27
+ # implementation of Textile for markup.
28
+ markdown.concat("#{retrieve_post_content(data)}\n\n")
29
+
30
+ # Let's get some photos tacked onto the end, shall we?
31
+ markdown.concat("#{attach_images(data)}\n")
32
+
33
+ # If anybody replied to these posts, let's retrieve the replies
34
+ # and tack them onto the end. Some of them are actually worth a damn.
35
+ replies = data['object']['replies']['items']
36
+ unless(replies == nil)
37
+ markdown.concat("#{retrieve_replies(replies)}")
38
+ end
39
+
40
+ handle_file_io(markdown, file_name)
41
+ end
42
+ end
43
+
44
+ private
45
+
46
+ def generate_front_matter(data)
47
+ content = ""
48
+
49
+ attachment = Hash.new
50
+ unless data['object']['attachments'] == nil
51
+ attachment = data['object']['attachments'][0]
52
+ end
53
+
54
+ # you need this before and after a YAML front matter block.
55
+ yaml_delimiter = "---"
56
+
57
+ # Google+ post titles are wonky. Let's just take the first line.
58
+ # by default. You can always edit the markdown file afterward.
59
+ # I'm also stripping out the newline character at the end.
60
+ if attachment != nil && attachment['displayName']
61
+ post_title = attachment['displayName']
62
+ elsif data['title'].length != 0
63
+ post_title = data['title'].lines.first.delete!("\n")
64
+ else
65
+ post_title = "No Title Available"
66
+ end
67
+
68
+ # I'm getting the raw text of the post, with Google+'s half-assed
69
+ # implementation of Textile for markup.
70
+ post_content = data['object']['originalContent']
71
+
72
+ # We'll use the first 150 charcters as the post excerpt.
73
+ # If you use the Markdown output in Jekyll, it'll put this
74
+ # in the description meta tag, which used to be good for SEO.
75
+ # Maybe it still is, but I don't get paid to give a shit.
76
+ # We'll replace newlines with spaces here. Edit as needed.
77
+ if post_content != nil
78
+ post_excerpt = post_content[0..150].gsub("\n", " ")
79
+ elsif attachment != nil && attachment['objectType'] == "video"
80
+ post_excerpt = attachment['displayName']
81
+ else
82
+ post_excerpt = "No Excerpt Available"
83
+ end
84
+
85
+ # Let's clean up the excerpt a bit.
86
+ processed_excerpt = process_post_content(post_excerpt)
87
+
88
+ # Google can't use "created" and "modified"; that would
89
+ # make too much sense. They used "published" and "updated"
90
+ # instead, respectively. We'll use the published date
91
+ # to name the Markdown output file, and include it in the
92
+ # YAML front matter. If the updated date is different,
93
+ # I'll include that too.
94
+ #
95
+ # Of course, you'll want to rename each markdown file to match
96
+ # the format specified in the Jekyll documentation at
97
+ # http://jekyllrb.com/docs/posts/. Otherwise, your permalinks
98
+ # will probably look like shit.
99
+ date_created = data['published']
100
+ date_modified = data['updated']
101
+
102
+ # let's build the output string now.
103
+ content.concat("#{yaml_delimiter}\n")
104
+ content.concat("layout: post\n")
105
+ content.concat("title: \"#{post_title}\"\n")
106
+ content.concat("categories: blog\n")
107
+ content.concat("excerpt: \"#{processed_excerpt}\"\n")
108
+
109
+ # Let's see if there's an article attached.
110
+ unless(attachment == nil)
111
+ object_type = attachment['objectType']
112
+
113
+ case object_type
114
+ when "video"
115
+ video_url = attachment['url']
116
+ content.concat("link: \"#{video_url}\"\n")
117
+ when "article"
118
+ article_url = attachment['url']
119
+ article_title = attachment['displayName']
120
+
121
+ content.concat("link: \"#{article_url}\"\n")
122
+ content.concat("linkTitle: \"#{article_title}\"\n")
123
+ end
124
+ end
125
+
126
+ # Check for an updated date and add it if it exists.
127
+ content.concat("date: #{date_created}\n")
128
+ unless(date_created == date_modified)
129
+ content.concat("updated: #{date_modified}\n")
130
+ end
131
+ content.concat("#{yaml_delimiter}\n")
132
+
133
+ return content
134
+ end
135
+
136
+ def process_post_content(content)
137
+ # Pretty much the only thing Google+ took from Textile markup
138
+ # is their notation for bold and italic text. I'm converting it
139
+ # to Markdown style here in a quick and dirty 2 stage process.
140
+ content.gsub!("\*", "\*\*")
141
+ content.gsub!("\_", "\*")
142
+
143
+ # Google+ mentions are complicated. They resolve to names when
144
+ # rendered in the desktop or mobile app, but are stored as
145
+ # 21-digit numbers prefixed with an @ symbol. I can convert these
146
+ # to Markdown-formatted links with URLs you can paste into your
147
+ # browser to actually get the names.
148
+ mentions = content.scan(/\@(?:[0-9]{21})/).flatten
149
+ mentions.each { |m| content.gsub!(m, "[SOMEBODY](https://plus.google.com/#{m.gsub("@", "")})") }
150
+
151
+ return content
152
+ end
153
+
154
+ def retrieve_post_content(data)
155
+ post_content = data['object']['originalContent']
156
+
157
+ attachment = Hash.new
158
+ unless data['object']['attachments'] == nil
159
+ attachment = data['object']['attachments'][0]
160
+ end
161
+
162
+ if post_content != nil
163
+ # Processing the content gets complicated when you factor
164
+ # in mentions. We'll define a function for this.
165
+ processed_content = process_post_content(post_content)
166
+ elsif attachment['objectType'] == "video"
167
+ processed_content = data['annotation']
168
+ else
169
+ processed_content = "No Post Text Available"
170
+ end
171
+
172
+ return processed_content
173
+ end
174
+
175
+ def retrieve_replies(replies)
176
+ content = "\#\# Comments from Google+ users\n\n"
177
+
178
+ replies.each do |reply|
179
+ name = reply['actor']['displayName']
180
+ profile_url = reply['actor']['url']
181
+ reply_date = Date.parse(reply['published'])
182
+ reply_text = reply['object']['content']
183
+
184
+ content.concat("\#\#\#\#[#{name}](#{profile_url})\n")
185
+ content.concat("**posted on #{reply_date.strftime("%A, %m/%d/%Y at %I:%M%P %Z")}**\n\n")
186
+ content.concat("#{reply_text}\n\n")
187
+ end
188
+
189
+ return content
190
+ end
191
+
192
+ def attach_images(data)
193
+ content = ""
194
+ attachments = data['object']['attachments']
195
+
196
+ unless(attachments == nil)
197
+ attachments.each do |attachment|
198
+ object_type = attachment['objectType']
199
+
200
+ unless(object_type != "photo")
201
+ id = attachment['id']
202
+ url = attachment['fullImage']['url']
203
+
204
+ content.concat("![#{id}](#{url})")
205
+ end
206
+ end
207
+ end
208
+
209
+ return content
210
+ end
211
+
212
+ def handle_file_io(content, filename)
213
+ # Now the fun begins. First, let's see if the output directory exists.
214
+ # If it doesn't, create it.
215
+ output_dir = "./markdown"
216
+ unless(Dir.exist?(output_dir))
217
+ Dir.mkdir(output_dir)
218
+ end
219
+
220
+ # Generate the file path.
221
+ file_path = File.join(output_dir, filename)
222
+
223
+ # Now let's open the file to write...
224
+ output = File.open(file_path, "w")
225
+
226
+ # Put in the post content, close the file, and we're done.
227
+ output.puts(content)
228
+ output.close
229
+ end
230
+ end
metadata ADDED
@@ -0,0 +1,67 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: googleplus_markdown
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.2.0
5
+ platform: ruby
6
+ authors:
7
+ - Matthew Graybosch
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2015-02-10 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: json
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: 1.8.2
20
+ - - ">="
21
+ - !ruby/object:Gem::Version
22
+ version: 1.8.2
23
+ type: :runtime
24
+ prerelease: false
25
+ version_requirements: !ruby/object:Gem::Requirement
26
+ requirements:
27
+ - - "~>"
28
+ - !ruby/object:Gem::Version
29
+ version: 1.8.2
30
+ - - ">="
31
+ - !ruby/object:Gem::Version
32
+ version: 1.8.2
33
+ description: Use this gem to convert Google+ JSON generated by Google Takeout to Markdown.
34
+ Includes YAML front-matter for use with Jekyll.
35
+ email: matthew@starbreakerseries.com
36
+ executables:
37
+ - gplus2markdown
38
+ extensions: []
39
+ extra_rdoc_files: []
40
+ files:
41
+ - bin/gplus2markdown
42
+ - lib/googleplus_markdown.rb
43
+ homepage: https://github.com/demifiend/gplus2markdown
44
+ licenses:
45
+ - MIT
46
+ metadata: {}
47
+ post_install_message:
48
+ rdoc_options: []
49
+ require_paths:
50
+ - lib
51
+ required_ruby_version: !ruby/object:Gem::Requirement
52
+ requirements:
53
+ - - ">="
54
+ - !ruby/object:Gem::Version
55
+ version: '0'
56
+ required_rubygems_version: !ruby/object:Gem::Requirement
57
+ requirements:
58
+ - - ">="
59
+ - !ruby/object:Gem::Version
60
+ version: '0'
61
+ requirements: []
62
+ rubyforge_project:
63
+ rubygems_version: 2.4.5
64
+ signing_key:
65
+ specification_version: 4
66
+ summary: Coverts Google+ JSON to Markdown
67
+ test_files: []