RubyGems - googleplus_markdown - Versions diffs - 0.2.0 - Mend

googleplus_markdown 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (4) hide show

checksums.yaml ADDED

@@ -0,0 +1,7 @@
+---
+SHA1:
+  metadata.gz: 4331b1a528b01eb53f78851457fb0b4b1eb53b18
+  data.tar.gz: fc88d751d7f92b182e608178074b39f1cacf07a2
+SHA512:
+  metadata.gz: a00f492332390828aaba4318faa03f21eb8b4f1a7cc27eeda57b888a95fb852fb6898275dd346149e53546dd517b2784be4df06e65d3b34ea688437f2e66d1c4
+  data.tar.gz: cb92394ff49518c7f9821a2aa42c4ef70dc433fcc2829207b0849ae402721b0f5fd4a3d0145d7cf96d6752f8d9b25292cd5a3cdc7d638b22d857f34fe4a22ee6

data/bin/gplus2markdown ADDED

@@ -0,0 +1,33 @@
+#!/usr/bin/env ruby
+# gplus2markdown
+#
+# creator: Matthew Graybosch
+# email: matthew@starbreakerseries.com
+# created: 2 February 2015
+#
+# This program reads ARGF to get a JSON file
+# created from the Google+ stream by Google Takeout. It
+# then runs each file through a convert to generate a
+# Markdown file with YAML front-matter suitable for use
+# as a Jekyll post.
+#
+# Combine this with a shell script that will loop through
+# a directory of JSON files to process multiple files.
+#
+# Just keep in mind that this script has no error handling
+# or data validation logic. Feed it bad JSON and it will
+# die screaming.
+# We need this library to parse JSON data.
+require 'json'
+require 'googleplus_markdown'
+converter = GooglePlusMarkdown.new
+ARGV.each do | file |
+  puts file
+  raw_post = File.read(file)
+  data = JSON.parse(raw_post)
+  converter.convert(data)
+end

data/lib/googleplus_markdown.rb ADDED

@@ -0,0 +1,230 @@
+# googleplus_markdown.rb
+#
+# creator: Matthew Graybosch
+# email: matthew@starbreakerseries.com
+# created: 9 February 2015
+require 'date'
+require 'json'
+class GooglePlusMarkdown
+  def convert(data)
+    unless data['verb'] == "share" || data['access']['accessSummary'] == "Shared privately"
+      # Let's declare the output string.
+      markdown = ""
+      # I need the creation date so I can set the file name.
+      # Of course, Google can't just call it a 'created' date.
+      date_created = data['published']
+      file_name = "#{date_created}.md"
+      # Instead of doing multiple writes to a file, I'm going to
+      # build a string that I can pass around to various functions.
+      # When I'm done, I'll write *that* to the file.
+      markdown.concat(generate_front_matter(data))
+      # I'm getting the raw text of the post, with Google+'s half-assed
+      # implementation of Textile for markup.
+      markdown.concat("#{retrieve_post_content(data)}\n\n")
+      # Let's get some photos tacked onto the end, shall we?
+      markdown.concat("#{attach_images(data)}\n")
+      # If anybody replied to these posts, let's retrieve the replies
+      # and tack them onto the end. Some of them are actually worth a damn.
+      replies = data['object']['replies']['items']
+      unless(replies == nil)
+        markdown.concat("#{retrieve_replies(replies)}")
+      end
+      handle_file_io(markdown, file_name)
+    end
+  end
+  private
+  def generate_front_matter(data)
+    content = ""
+    attachment = Hash.new
+    unless data['object']['attachments'] == nil
+      attachment = data['object']['attachments'][0]
+    end
+    # you need this before and after a YAML front matter block.
+    yaml_delimiter = "---"
+    # Google+ post titles are wonky. Let's just take the first line.
+    # by default. You can always edit the markdown file afterward.
+    # I'm also stripping out the newline character at the end.
+    if attachment != nil && attachment['displayName']
+      post_title = attachment['displayName']
+    elsif data['title'].length != 0
+      post_title = data['title'].lines.first.delete!("\n")
+    else
+      post_title = "No Title Available"
+    end
+    # I'm getting the raw text of the post, with Google+'s half-assed
+    # implementation of Textile for markup.
+    post_content = data['object']['originalContent']
+    # We'll use the first 150 charcters as the post excerpt.
+    # If you use the Markdown output in Jekyll, it'll put this
+    # in the description meta tag, which used to be good for SEO.
+    # Maybe it still is, but I don't get paid to give a shit.
+    # We'll replace newlines with spaces here. Edit as needed.
+    if post_content != nil
+      post_excerpt = post_content[0..150].gsub("\n", " ")
+    elsif attachment != nil && attachment['objectType'] == "video"
+      post_excerpt = attachment['displayName']
+    else
+      post_excerpt = "No Excerpt Available"
+    end
+    # Let's clean up the excerpt a bit.
+    processed_excerpt = process_post_content(post_excerpt)
+    # Google can't use "created" and "modified"; that would
+    # make too much sense. They used "published" and "updated"
+    # instead, respectively. We'll use the published date
+    # to name the Markdown output file, and include it in the
+    # YAML front matter. If the updated date is different,
+    # I'll include that too.
+    #
+    # Of course, you'll want to rename each markdown file to match
+    # the format specified in the Jekyll documentation at
+    # http://jekyllrb.com/docs/posts/. Otherwise, your permalinks
+    # will probably look like shit.
+    date_created = data['published']
+    date_modified = data['updated']
+    # let's build the output string now.
+    content.concat("#{yaml_delimiter}\n")
+    content.concat("layout: post\n")
+    content.concat("title: \"#{post_title}\"\n")
+    content.concat("categories: blog\n")
+    content.concat("excerpt: \"#{processed_excerpt}\"\n")
+    # Let's see if there's an article attached.
+    unless(attachment == nil)
+      object_type = attachment['objectType']
+      case object_type
+      when "video"
+        video_url = attachment['url']
+        content.concat("link: \"#{video_url}\"\n")
+      when "article"
+        article_url = attachment['url']
+        article_title = attachment['displayName']
+        content.concat("link: \"#{article_url}\"\n")
+        content.concat("linkTitle: \"#{article_title}\"\n")
+      end
+    end
+    # Check for an updated date and add it if it exists.
+    content.concat("date: #{date_created}\n")
+    unless(date_created == date_modified)
+      content.concat("updated: #{date_modified}\n")
+    end
+    content.concat("#{yaml_delimiter}\n")
+    return content
+  end
+  def process_post_content(content)
+    # Pretty much the only thing Google+ took from Textile markup
+    # is their notation for bold and italic text. I'm converting it
+    # to Markdown style here in a quick and dirty 2 stage process.
+    content.gsub!("\*", "\*\*")
+    content.gsub!("\_", "\*")
+    # Google+ mentions are complicated. They resolve to names when
+    # rendered in the desktop or mobile app, but are stored as
+    # 21-digit numbers prefixed with an @ symbol. I can convert these
+    # to Markdown-formatted links with URLs you can paste into your
+    # browser to actually get the names.
+    mentions = content.scan(/\@(?:[0-9]{21})/).flatten
+    mentions.each { |m| content.gsub!(m, "[SOMEBODY](https://plus.google.com/#{m.gsub("@", "")})") }
+    return content
+  end
+  def retrieve_post_content(data)
+    post_content = data['object']['originalContent']
+    attachment = Hash.new
+    unless data['object']['attachments'] == nil
+      attachment = data['object']['attachments'][0]
+    end
+    if post_content != nil
+      # Processing the content gets complicated when you factor
+      # in mentions. We'll define a function for this.
+      processed_content = process_post_content(post_content)
+    elsif attachment['objectType'] == "video"
+      processed_content = data['annotation']
+    else
+      processed_content = "No Post Text Available"
+    end
+    return processed_content
+  end
+  def retrieve_replies(replies)
+    content = "\#\# Comments from Google+ users\n\n"
+    replies.each do |reply|
+      name = reply['actor']['displayName']
+      profile_url = reply['actor']['url']
+      reply_date = Date.parse(reply['published'])
+      reply_text = reply['object']['content']
+      content.concat("\#\#\#\#[#{name}](#{profile_url})\n")
+      content.concat("**posted on #{reply_date.strftime("%A, %m/%d/%Y at %I:%M%P %Z")}**\n\n")
+      content.concat("#{reply_text}\n\n")
+    end
+    return content
+  end
+  def attach_images(data)
+    content = ""
+    attachments = data['object']['attachments']
+    unless(attachments == nil)
+      attachments.each do |attachment|
+        object_type = attachment['objectType']
+        unless(object_type != "photo")
+          id = attachment['id']
+          url = attachment['fullImage']['url']
+          content.concat("![#{id}](#{url})")
+        end
+      end
+    end
+    return content
+  end
+  def handle_file_io(content, filename)
+    # Now the fun begins. First, let's see if the output directory exists.
+    # If it doesn't, create it.
+    output_dir = "./markdown"
+    unless(Dir.exist?(output_dir))
+      Dir.mkdir(output_dir)
+    end
+    # Generate the file path.
+    file_path = File.join(output_dir, filename)
+    # Now let's open the file to write...
+    output = File.open(file_path, "w")
+    # Put in the post content, close the file, and we're done.
+    output.puts(content)
+    output.close
+  end
+end

metadata ADDED

@@ -0,0 +1,67 @@
+--- !ruby/object:Gem::Specification
+name: googleplus_markdown
+version: !ruby/object:Gem::Version
+  version: 0.2.0
+platform: ruby
+authors:
+- Matthew Graybosch
+autorequire:
+bindir: bin
+cert_chain: []
+date: 2015-02-10 00:00:00.000000000 Z
+dependencies:
+- !ruby/object:Gem::Dependency
+  name: json
+  requirement: !ruby/object:Gem::Requirement
+    requirements:
+    - - "~>"
+      - !ruby/object:Gem::Version
+        version: 1.8.2
+    - - ">="
+      - !ruby/object:Gem::Version
+        version: 1.8.2
+  type: :runtime
+  prerelease: false
+  version_requirements: !ruby/object:Gem::Requirement
+    requirements:
+    - - "~>"
+      - !ruby/object:Gem::Version
+        version: 1.8.2
+    - - ">="
+      - !ruby/object:Gem::Version
+        version: 1.8.2
+description: Use this gem to convert Google+ JSON generated by Google Takeout to Markdown.
+  Includes YAML front-matter for use with Jekyll.
+email: matthew@starbreakerseries.com
+executables:
+- gplus2markdown
+extensions: []
+extra_rdoc_files: []
+files:
+- bin/gplus2markdown
+- lib/googleplus_markdown.rb
+homepage: https://github.com/demifiend/gplus2markdown
+licenses:
+- MIT
+metadata: {}
+post_install_message:
+rdoc_options: []
+require_paths:
+- lib
+required_ruby_version: !ruby/object:Gem::Requirement
+  requirements:
+  - - ">="
+    - !ruby/object:Gem::Version
+      version: '0'
+required_rubygems_version: !ruby/object:Gem::Requirement
+  requirements:
+  - - ">="
+    - !ruby/object:Gem::Version
+      version: '0'
+requirements: []
+rubyforge_project:
+rubygems_version: 2.4.5
+signing_key:
+specification_version: 4
+summary: Coverts Google+ JSON to Markdown
+test_files: []