locomotivecms-freight 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 1fc95af0db9dc7fbcca0bfc5b46cefb7d81e54f9
4
+ data.tar.gz: 7d775e2262246f556a7804a0216ee43eb13374c4
5
+ SHA512:
6
+ metadata.gz: 2bb89b63aa9c160f5d7adbab3e40236ee47812b41e45372f41909c3c04920dcd6f0ce3fa496f70b5ad5d2c4fd3685964453a92ae7bbd72fe884a9e2be25d9a94
7
+ data.tar.gz: b09c31d678839c33f5153f9da6bd46c1da4a3161087828c7f59ae947f9bf8703b3d8541a1d6df8ef9e9ed12594e50881c2a31eda9cc39e28c4b5abfba665191d
data/.gitignore ADDED
@@ -0,0 +1,9 @@
1
+ /.bundle/
2
+ /.yardoc
3
+ /Gemfile.lock
4
+ /_yardoc/
5
+ /coverage/
6
+ /doc/
7
+ /pkg/
8
+ /spec/reports/
9
+ /tmp/
data/.rspec ADDED
@@ -0,0 +1,2 @@
1
+ --format documentation
2
+ --color
data/.travis.yml ADDED
@@ -0,0 +1,4 @@
1
+ language: ruby
2
+ rvm:
3
+ - 2.2.1
4
+ before_install: gem install bundler -v 1.10.6
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in locomotivecms-freight.gemspec
4
+ gemspec
data/LICENSE.txt ADDED
@@ -0,0 +1,21 @@
1
+ The MIT License (MIT)
2
+
3
+ Copyright (c) 2016 Joel Helbling
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in
13
+ all copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21
+ THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,82 @@
1
+ # Locomotivecms::Freight
2
+
3
+ Imports posts, comments and images from any WordPress site. Rewrites image tags and internal links.
4
+
5
+ ## Installation
6
+
7
+ This gem adds rake tasks to a LocomotiveCMS Wagon-generated project directory.
8
+
9
+ Add this line to your application's Gemfile:
10
+
11
+ ```ruby
12
+ gem 'locomotivecms-freight'
13
+ ```
14
+
15
+ And then execute:
16
+
17
+ $ bundle
18
+
19
+ Or install it yourself as:
20
+
21
+ $ gem install locomotivecms-freight
22
+
23
+ Finally, place this somewhere in your project's Rakefile:
24
+
25
+ ```ruby
26
+ require 'locomotivecms/freight/tasks'
27
+ ```
28
+
29
+ ## Usage
30
+
31
+ First, on your WordPress site, log in as an admin user, and navigate to Tools -> Export. Select
32
+ "All content" and click "Download Export File". Save the file and remember the location.
33
+
34
+ Next, install the LocomotiveCMS content_types needed for WordPress posts and comments:
35
+
36
+ bundle exec rake wp:install_content_types
37
+
38
+ Each post's body will be converted to markdown. After this step, you will need to push the new
39
+ content types to each engine to which you will be deploying:
40
+
41
+ bundle exec wagon sync production -v -r content_types
42
+
43
+ Now that we have the necessary content_types to receive them, we can import all posts, comments and
44
+ images from a WordPress site, run the following command:
45
+
46
+ bundle exec rake wp:import TARGET=production XML=/path/to/my-wordpress-export.xml
47
+
48
+ Note that `TARGET` should reference one of the environments defined in your `config/deploy.yml` file.
49
+ Also be aware that image importing will not work unless the WordPress site from which you are exporting
50
+ is up and operational. This is because the XML export file from a WordPress site contains pages, posts
51
+ and comments, but not other file resources such as images, audio/video files etc.
52
+
53
+ Also be aware that as of this writing, Freight's only concern is with posts and comments; it ignores
54
+ pages.
55
+
56
+ If there were images downloaded then you will need to push them up to your target LocomotiveCMS engine:
57
+
58
+ bundle exec wagon sync production -v
59
+
60
+ To remove all _imported_ posts and associated comments, run this:
61
+
62
+ bundle exec rake wp:clean TARGET=production
63
+
64
+ And finally, to remove all imported posts and associated comments and then re-import, do this:
65
+
66
+ bundle exec rake wp:reload TARGET=production XML=/path/to/my-wordpress-export.xml
67
+
68
+ ## Development
69
+
70
+ After checking out the repo, run `bin/setup` to install dependencies. Then, run `rake spec` to run the tests. You can also run `bin/console` for an interactive prompt that will allow you to experiment.
71
+
72
+ To install this gem onto your local machine, run `bundle exec rake install`. To release a new version, update the version number in `version.rb`, and then run `bundle exec rake release`, which will create a git tag for the version, push git commits and tags, and push the `.gem` file to [rubygems.org](https://rubygems.org).
73
+
74
+ ## Contributing
75
+
76
+ Bug reports and pull requests are welcome on GitHub at https://github.com/joelhelbling/locomotivecms-freight.
77
+
78
+
79
+ ## License
80
+
81
+ The gem is available as open source under the terms of the [MIT License](http://opensource.org/licenses/MIT).
82
+
data/Rakefile ADDED
@@ -0,0 +1,7 @@
1
+ require "bundler/gem_tasks"
2
+ require "rspec/core/rake_task"
3
+ require "locomotivecms/freight/tasks"
4
+
5
+ RSpec::Core::RakeTask.new(:spec)
6
+
7
+ task :default => :spec
data/bin/console ADDED
@@ -0,0 +1,16 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require "bundler/setup"
4
+ require "rake"
5
+ require "locomotivecms/freight"
6
+ require "locomotivecms/freight/tasks"
7
+
8
+ # You can add fixtures and/or initialization code here to make experimenting
9
+ # with your gem easier. You can also use a different console, if you like.
10
+
11
+ # (If you use this, don't forget to add pry to your Gemfile!)
12
+ # require "pry"
13
+ # Pry.start
14
+
15
+ require "irb"
16
+ IRB.start
data/bin/freight ADDED
@@ -0,0 +1,14 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require File.expand_path(File.join File.dirname(__FILE__), '..', 'lib', 'locomotivecms', 'freight')
4
+
5
+ wp_export = ARGV[0]
6
+
7
+ puts <<-SORRY
8
+ So sorry, but this script is not yet functional. For now, if you'll just add
9
+
10
+ require 'locomotivecms/freight/tasks'
11
+
12
+ to your wagon project's Rakefile, you should get a slew of useful tasks for importing
13
+ from a WordPress site.
14
+ SORRY
data/bin/setup ADDED
@@ -0,0 +1,7 @@
1
+ #!/bin/bash
2
+ set -euo pipefail
3
+ IFS=$'\n\t'
4
+
5
+ bundle install
6
+
7
+ # Do any other automated setup that you need to do here
@@ -0,0 +1,67 @@
1
+ name: Comments
2
+ slug: comments
3
+ description: Comments on blog posts
4
+ label_field_name: email
5
+ order_by: created_at
6
+ order_direction: asc
7
+
8
+ # Activate public 'create' API (e.g for a contact form)
9
+ # public_submission_enabled: false
10
+
11
+ # Array of emails to be notified of new entries made with the public API
12
+ # public_submission_accounts: ['john@example.com']
13
+
14
+ # Control the display of the content type in the back-office.
15
+ # display_settings:
16
+ # seo: false # display the SEO tab for the content entries
17
+ # advanced: false # display the Advanced tab for the content entries
18
+ # position: 1 # position in the sidebar menu
19
+ # hidden: false # hidden for authors?
20
+
21
+ entry_template: |
22
+ <a href={{ link }}>{{ entry.name }} &lt;{{ entry._label }}&gt;</a>
23
+ [ {{ entry.commented_at }} ]<br>
24
+ {{ entry.content }}
25
+
26
+ fields:
27
+ - name:
28
+ label: Name
29
+ type: string
30
+ required: false
31
+ hint: the huumon who made the wisecrack
32
+ localized: false
33
+
34
+ - email:
35
+ label: Email
36
+ type: string
37
+ required: false
38
+ hint: commenter's email address
39
+ localized: false
40
+
41
+ - url:
42
+ label: Url
43
+ type: string
44
+ required: false
45
+ hint: the commenter's (optional) homepage url
46
+ localized: false
47
+
48
+ - commented_at:
49
+ label: Commented at
50
+ type: date_time
51
+ required: true
52
+ hint: when what was said, was said, exactly
53
+ localized: false
54
+
55
+ - content:
56
+ label: Comment
57
+ type: text
58
+ required: true
59
+ hint: Explanatory text displayed in the back office
60
+ localized: false
61
+ text_formatting: html
62
+
63
+ - post:
64
+ label: Post
65
+ hint: The blog post this comment was commenting on
66
+ type: belongs_to
67
+ class_name: posts
@@ -0,0 +1,115 @@
1
+ # Human readable name of this type
2
+ name: Posts
3
+
4
+ # Lowercase, underscored handle used to access this type
5
+ slug: posts
6
+
7
+ # Explanatory text displayed in the back-office
8
+ description: Blog post
9
+
10
+ # Slug of field used to identify entries by default, such as the title
11
+ label_field_name: title
12
+
13
+ # Valid values: manually, created_at, updated_at, or the slug of any field
14
+ order_by: posted_at
15
+
16
+ # Valid values: asc (ascending) and desc (descending). Set to asc by default.
17
+ order_direction: desc
18
+
19
+ # Specify a field slug to group entries by that field in the back-office.
20
+ group_by: _visible
21
+
22
+ # Activate public 'create' API (e.g for a contact form)
23
+ # public_submission_enabled: false
24
+
25
+ # Array of emails to be notified of new entries made with the public API
26
+ # public_submission_accounts: ['john@example.com']
27
+
28
+ # Control the display of the content type in the back-office.
29
+ display_settings:
30
+ seo: true # display the SEO tab for the content entries
31
+ advanced: true # display the Advanced tab for the content entries
32
+ # position: 1 # position in the sidebar menu
33
+ # hidden: false # hidden for authors?
34
+
35
+ # By default, the back-office displays the _label property (see label_field_name) of the content entry. This can be modified by writing your own Liquid template below:
36
+ # entry_template: '<a href="{{ link }}">{{ entry._label }}</a>' # The default template
37
+ entry_template: |
38
+ <a href={{ link }}>
39
+ <h4 class=text-info>{{ entry.title }}</h4>
40
+ </a>
41
+ <small><em class=text-muted>{{ entry.posted_at | date: "%I:%M %p, %a, %b %d, %Y" }}</em></small><br />
42
+ <p>{{ entry.teaser }}</p>
43
+
44
+ # A list describing each field
45
+ fields:
46
+ - title:
47
+ label: Title
48
+ type: string
49
+ required: true
50
+ localized: false
51
+
52
+ - keywords:
53
+ label: Keywords
54
+ type: tags
55
+ required: false
56
+ hint: one or more tags or keywords for this post
57
+ localized: false
58
+
59
+ - posted_at:
60
+ label: Posted at
61
+ type: date_time
62
+ required: false
63
+ hint: the date when the article was or will be posted
64
+ localized: false
65
+
66
+ - body:
67
+ label: Body
68
+ type: text
69
+ required: false
70
+ hint: <%= @format == 'markdown' ? 'uses markdown' : 'Use the WYSIWYG, Luke...' %>
71
+ localized: false
72
+ text_formatting: <%= @format %>
73
+
74
+ - teaser:
75
+ label: Teaser / synopsys
76
+ type: text
77
+ required: false
78
+ hint: <%= @format == 'markdown' ? 'uses markdown' : 'Use the WYSIWYG, Luke...' %>
79
+ localized: false
80
+ text_formatting: <%= @format %>
81
+
82
+ - featured_image:
83
+ label: Featured image
84
+ type: file
85
+ required: false
86
+ hint: An image to accompany the heading of the post
87
+ localized: false
88
+
89
+ - open_for_comment: # WordPress: comment_status
90
+ label: Open for comments?
91
+ type: boolean
92
+ required: true
93
+ hint: Allow comments on this post (or don't)
94
+ localized: false
95
+
96
+ # Some other fields to consider (culled from the CJ WP export):
97
+ # - comment_status (open/closed) --make a bool open_for_comment
98
+ # - creator/author
99
+ # - published (boolean)
100
+ # - category (belongs_to) try using tags
101
+
102
+ - source:
103
+ label: Content source
104
+ type: string
105
+ required: false
106
+ hint: For use by import scripts
107
+ localized: false
108
+
109
+ - comments:
110
+ label: Comments
111
+ hint: A description of the relationship for the editors
112
+ type: has_many
113
+ class_name: comments
114
+ inverse_of: post
115
+ ui_enabled: true
@@ -0,0 +1,138 @@
1
+ require 'active_support/core_ext/object/blank'
2
+ require 'ostruct'
3
+
4
+ module Locomotivecms
5
+ module Freight
6
+
7
+ module HtmlToMarkdown
8
+ class << self
9
+ def convert_to_markdown html
10
+ html = convert_italics html
11
+ html = convert_bold html
12
+ html = convert_tables html
13
+ html = convert_preformatted html
14
+ html = convert_paragraphs html
15
+ html = convert_headings html
16
+ html = convert_images html
17
+ html = convert_links html
18
+ html
19
+ end
20
+
21
+ def convert_italics html
22
+ html \
23
+ .gsub(/<(i .*?>|i>|em.*?>)[\n\s]*/i, '_')
24
+ .gsub(/[\n\s]*\<(\/i|\/em)[^\>]*\>/i, '_')
25
+ end
26
+
27
+ def convert_bold html
28
+ html \
29
+ .gsub(/\<(b .*?>|b>|strong.*?>)[\n\s]*/i, '**')
30
+ .gsub(/[\n\s]*\<(\/b|\/strong)[^\>]*\>/i, '**')
31
+ end
32
+
33
+ def convert_preformatted html
34
+ html.gsub(/<pre.*?>.*?<\/pre.*?>/m) do |pre|
35
+ text = pre.match(/<pre.*?>(.*?)<\/pre.*?>/m)[1]
36
+ text \
37
+ .strip
38
+ .split(/\n\n/m)
39
+ .map do |para|
40
+ para.split(/\n/).join("<br />\n")
41
+ end \
42
+ .join("\n\n")
43
+ .chomp
44
+ end.gsub(/\n\n\n/, "\n\n")
45
+ end
46
+
47
+ def convert_tables html
48
+ html.gsub(/\s*<table.*?>.*?<\/table>/mi) do |table|
49
+ markdown = "\n\n|"
50
+ table.scan(/<tr.*?>.*?<\/tr>/mi) do |row|
51
+ row.scan(/<td.*?>.*?<\/td>/mi) do |cell|
52
+ markdown << cell.gsub(/^<td.*?>/mi, '').gsub(/<\/td>$/mi, '')
53
+ markdown << "|"
54
+ end
55
+ end
56
+ markdown << "\n"
57
+ end
58
+ end
59
+
60
+ def convert_paragraphs html
61
+ if html.match(/<\/?p\s?.*?>/)
62
+ html = html.dup \
63
+ .split(/[\s\n]*\<\/{0,1}p.*?\>[\s\n]*/i)
64
+ .reject{|chunk| chunk.match /^[\s\n]*$/ }
65
+ .join("\n\n")
66
+ .chomp + "\n"
67
+ end
68
+ html
69
+ end
70
+
71
+ def convert_headings html
72
+ html.gsub(/\<h\d.*?\>.*?\<\/h\d\>/mi) { |heading|
73
+ match = heading.match /\<h(\d).*?\>(.*?)\<\/h\d\>/mi
74
+ heading_prefix = "#" * match[1].to_i
75
+ heading_text = match[2].gsub(/\n/, ' ').gsub(/^\s*/, '').gsub(/\s*$/, '')
76
+ "#{heading_prefix} #{heading_text}"
77
+ }
78
+ end
79
+
80
+ def convert_links html
81
+ html = html.dup
82
+ urls = []
83
+ html.gsub!(/\<a.*?\>.*?\<\/a\>/mi) { |link|
84
+ index = urls.length
85
+ match = link.match /\<a.*href=["']?([^\s"']+)["']?.*\>(.*?)\<\/a\>/mi
86
+ if match.nil?
87
+ raise "No match for link: #{link}"
88
+ end
89
+ link_href = match[1]
90
+ link_text = match[2]
91
+ urls[index] = link_href
92
+ "[#{link_text}][#{index + 1}]"
93
+ }
94
+ html << "\n"
95
+ html << urls.each_with_index.map do |url, i|
96
+ index = i + 1
97
+ "[#{index}]: #{url}"
98
+ end.join("\n")
99
+ html << "\n"
100
+ end
101
+
102
+ def convert_images html
103
+ html = html.dup
104
+ images = {}
105
+ html.gsub!(/<img.*?\/?>/mi) { |img|
106
+ index = images.length
107
+ src_match = img.match /<img.*?src=["']?([^\s"']+)["']?.*?\/?>/mi
108
+ return img unless src_match.present?
109
+ src = src_match[1]
110
+ alt_match = img.match /<img.*?alt=["']([^"']+)["'].*?\/?>/mi
111
+ alt = alt_match[1] if alt_match.present?
112
+ title_match = img.match /<img.*?title=["']([^"']+)["'].*?\/?>/mi
113
+ title = title_match[1] if title_match.present?
114
+ if alt.blank?
115
+ if title.present?
116
+ alt = title
117
+ else
118
+ alt = src.gsub(/^.*\//,'').gsub(/\..*$/,'')
119
+ end
120
+ elsif title.blank?
121
+ title = alt
122
+ end
123
+ title = title.present? ? " \"#{title}\"" : nil
124
+ index = "img-#{(images.size + 1).to_s.rjust(2, '0')}"
125
+ images[index] = OpenStruct.new({ url: src, title: title })
126
+ "![#{alt}][#{index}]"
127
+ }
128
+ html << "\n"
129
+ html << images.each_pair.map do |index, img|
130
+ "[#{index}]: #{img.url}#{img.title}"
131
+ end.join("\n")
132
+ html << "\n"
133
+ end
134
+ end
135
+ end
136
+
137
+ end
138
+ end
@@ -0,0 +1,37 @@
1
+ module Locomotivecms
2
+ module Freight
3
+
4
+ class Post < Squares::Base
5
+ IMPORTABLE_PROPERTIES = [
6
+ :title, :keywords, :posted_at, :body, :teaser, :source, :_visible
7
+ ]
8
+ properties :title, :keywords, :posted_at, :body, :teaser, :source,
9
+ :wp_id
10
+ property :_visible, default: true
11
+ property :_id
12
+ property :_slug
13
+ property :xml
14
+
15
+ def links
16
+ body.scan(/^\[\d+\]\:.*$/).map do |link|
17
+ link.chomp.gsub(/^\[.*?\]:\s?/, '')
18
+ end
19
+ end
20
+
21
+ def image_urls
22
+ body.scan(/^\[img-\d+\]:.*$/).map do |img_url|
23
+ img_url.chomp.gsub(/^\[img-\d+\]:\s?/, '').gsub(/\s".*$/, '')
24
+ end
25
+ end
26
+
27
+ def cms_params
28
+ IMPORTABLE_PROPERTIES.reduce({}) do |params, property|
29
+ params.tap do |p|
30
+ p[property] = self.send property
31
+ end
32
+ end
33
+ end
34
+ end
35
+
36
+ end
37
+ end
@@ -0,0 +1,93 @@
1
+ require 'nokogiri'
2
+ require 'locomotive/coal'
3
+ require 'yaml'
4
+ require 'erb'
5
+ require 'pry'
6
+ require 'locomotivecms/freight/wordpress_importer'
7
+
8
+ namespace :wp do
9
+ desc "install post and comment content_types"
10
+ task :install_content_types do
11
+ unless File.exists?('app/content_types')
12
+ raise "Are you sure this is a LocomotiveCMS Wagon project directory? I don't see 'app/content_types'\n\n"
13
+ end
14
+ @format = ENV['FORMAT'] || 'html'
15
+ template_dir = File.expand_path('../content_types', __FILE__)
16
+ %w(posts comments).each do |template_name|
17
+ template_file = "#{template_name}.yml"
18
+ template = File.read(File.join template_dir, "#{template_file}.erb")
19
+ renderer = ERB.new posts_template
20
+ File.open("app/content_types/#{template_file}", 'w') do |fh|
21
+ fh.write renderer.result(binding)
22
+ end
23
+ end
24
+ end
25
+
26
+ desc "import WordPress export xml"
27
+ task import: [:importer, :input_file] do
28
+ @importer.import @input_file, 'markdown'
29
+ @importer.rewrite_internal_urls
30
+ @importer.rewrite_images
31
+ end
32
+
33
+ desc "remove all WordPress posts from the targeted environment"
34
+ task clean: :importer do
35
+ @importer.clean!
36
+ end
37
+
38
+ desc "clean and then re-import"
39
+ task reload: [:clean, :import]
40
+
41
+ task testing: :importer do
42
+ binding.pry
43
+ end
44
+
45
+ task all_the_links: [:importer, :input_file] do
46
+ puts @importer.all_links(@input_file).join("\n")
47
+ end
48
+
49
+ task all_the_images: [:importer, :input_file] do
50
+ puts @importer.all_images(@input_file).join("\n")
51
+ end
52
+
53
+ task all_the_tables: [:importer, :input_file] do
54
+ puts @importer.all_tables(@input_file).join("\n")
55
+ end
56
+
57
+ task remove_non_visible: :importer do
58
+ @importer.remove_non_visible_posts!
59
+ end
60
+
61
+ task input_file: :importer do
62
+ @input_file = ENV['XML'] || ENV['XML_FILE'] || ENV['FILE']
63
+
64
+ unless @input_file
65
+ raise "You must provide an xml file by adding 'XML=/path/to/my.xml'"
66
+ end
67
+ end
68
+
69
+ task importer: :client do
70
+ @importer = WordpressImporter.new @client
71
+ end
72
+
73
+ task client: :config do
74
+ @client = Locomotive::Coal::Client.new(@host, { email: @email, api_key: @api_key }).scope_by(@handle)
75
+ end
76
+
77
+ task :config do
78
+ target_env = ENV['TARGET']
79
+ config = YAML.load(File.read('config/deploy.yml'))
80
+
81
+ unless target_env && config.keys.include?(target_env)
82
+ raise "you must set the target environment, one of #{config.keys.join(', ')}"
83
+ end
84
+
85
+ @host = config[target_env]['host']
86
+ @handle = config[target_env]['handle']
87
+ @email = config[target_env]['email']
88
+ @api_key = config[target_env]['api_key']
89
+ end
90
+
91
+ end # namespace :wp
92
+
93
+
@@ -0,0 +1,14 @@
1
+ module Locomotivecms
2
+ module Freight
3
+ class Tasks
4
+ include Rake::DSL if defined? Rake::DSL
5
+
6
+ def install_tasks
7
+ load File.expand_path('../tasks.rake', __FILE__)
8
+ end
9
+ end
10
+ end
11
+ end
12
+
13
+ Locomotivecms::Freight::Tasks.new.install_tasks
14
+
@@ -0,0 +1,5 @@
1
+ module Locomotivecms
2
+ module Freight
3
+ VERSION = "0.0.1"
4
+ end
5
+ end
@@ -0,0 +1,261 @@
1
+ require 'active_support/core_ext/object/blank'
2
+ require 'ostruct'
3
+ require 'squares'
4
+ require 'locomotivecms/freight/post'
5
+ require 'locomotivecms/freight/html_to_markdown'
6
+
7
+ module Locomotivecms
8
+ module Freight
9
+
10
+ class WordpressImporter
11
+ attr_reader :client
12
+
13
+ SOURCE='WordPress Importer'
14
+
15
+ def initialize(client)
16
+ @client = client
17
+ end
18
+
19
+ def all_links input_file
20
+ parse_all_posts(input_file).map do |item|
21
+ body = item.xpath('content:encoded').text
22
+ body.scan(/<a .*?<\/a>/mi).map{|link| link.strip }
23
+ end.flatten
24
+ end
25
+
26
+ def all_images input_file
27
+ parse_all_posts(input_file).map do |item|
28
+ body = item.xpath('content:encoded').text
29
+ body.scan(/<img.*?>/mi).map{|img| img.strip }
30
+ end.flatten
31
+ end
32
+
33
+ def all_tables input_file
34
+ parse_all_posts(input_file).map do |item|
35
+ body = item.xpath('content:encoded').text
36
+ body.scan(/<table.*?<\/table>/i).map{|table| table.strip }
37
+ end.flatten
38
+ end
39
+
40
+ def all_imported_posts filter={ source: SOURCE }
41
+ [].tap do |all_posts|
42
+ page = 1
43
+ while page do
44
+ posts = client.contents.posts.all(filter, page: page)
45
+ all_posts << posts
46
+ page = posts._next_page
47
+ end
48
+ end.flatten
49
+ end
50
+
51
+ def all_comments filter={}
52
+ [].tap do |all_comments|
53
+ page = 1
54
+ while page do
55
+ comments = client.contents.comments.all(filter, page: page)
56
+ all_comments << comments
57
+ page = comments._next_page
58
+ end
59
+ end.flatten
60
+ end
61
+
62
+ def import import_file, format=html
63
+ metrics = OpenStruct.new posts: 0, comments: 0
64
+ parse_all_posts(import_file).each do |item|
65
+
66
+ post = post_for item
67
+ puts "creating post [[ #{post.title} ]]"
68
+ p = client.contents.posts.create post.cms_params
69
+ post._id = p._id
70
+ post.save
71
+ metrics.posts += 1
72
+
73
+ comments = comments_for item, post
74
+ unless comments.empty?
75
+ puts " -- creating #{comments.count} comments..."
76
+ comments.each do |comment|
77
+ client.contents.comments.create comment
78
+ metrics.comments += 1
79
+ end
80
+ end
81
+ end
82
+ puts "Imported #{metrics.posts} posts and #{metrics.comments} comments."
83
+ end
84
+
85
+ def rewrite_internal_urls
86
+ return unless Post.any?
87
+ metrics = OpenStruct.new links: 0
88
+
89
+ Post.each do |post|
90
+ needs_update = false
91
+ post.links.each do |link|
92
+ if linked_post = Post[link]
93
+ metrics.links += 1
94
+ puts "[ #{post.title} ]: rewriting \"#{link}\" -> \"/posts/#{linked_post._slug}\""
95
+ post.body.gsub! /#{link}/, "/posts/#{linked_post._slug}"
96
+ post.save
97
+ needs_update = true
98
+ end
99
+ end
100
+ client.contents.posts.update(post._id, { body: post.body }) if needs_update
101
+ end
102
+
103
+ puts "Rewrote #{metrics.links} links in #{Post.count} posts."
104
+ end
105
+
106
+ def rewrite_images
107
+ return unless Post.any?
108
+ metrics = OpenStruct.new images: 0
109
+
110
+ # ensure /public/images/posts exists
111
+
112
+ Post.each do |post|
113
+ needs_update = false
114
+ default_host = post.id.gsub(/^(https?:\/\/[^\/]*).*$/, '\1')
115
+
116
+ post.image_urls.each do |image_url|
117
+ original_image_url = image_url.dup
118
+ image_url = "#{default_host}#{image_url}" if image_url.match(/^\//)
119
+ unless (File.directory?('public/images/posts'))
120
+ Dir.mkdir('public/images/posts')
121
+ end
122
+
123
+ image_file_name = image_url.gsub(/^.*\//, '')
124
+ puts <<-TEXT.strip_heredoc
125
+
126
+ image_url: #{image_url}
127
+ image_file_name: #{image_file_name}
128
+ ================================================================================
129
+ TEXT
130
+ curl = `curl #{image_url} > public/images/posts/#{image_file_name}`
131
+ puts curl
132
+ if curl.match /<url> malformed/m
133
+ raise <<-ERROR.strip_heredoc
134
+ Problem downloading image "#{image_url}"
135
+ image_file_name: #{image_file_name}
136
+ post.body:
137
+
138
+ #{post.body}
139
+
140
+ ERROR
141
+ end
142
+ metrics.images += 1
143
+
144
+ post.body.gsub! /#{original_image_url}/, "/sites/#{site_id}/theme/images/posts/#{image_file_name}"
145
+ post.save
146
+ needs_update = true
147
+ end
148
+ client.contents.posts.update(post._id, { body: post.body }) if needs_update
149
+ end
150
+
151
+ puts "Rewrote #{metrics.images} images in #{Post.count} posts."
152
+ puts "Don't forget to `bundle exec wagon push ENV -r theme_assets`"
153
+ end
154
+
155
+ def clean!
156
+ remove_posts
157
+ end
158
+
159
+ def remove_non_visible_posts!
160
+ remove_posts _visible: false
161
+ end
162
+
163
+ def destroy thing
164
+ content_type = thing.content_type_slug.to_sym
165
+ contents_action content_type, :destroy, thing._id
166
+ end
167
+
168
+ def create content_type, params
169
+ contents_action content_type.to_sym, :create, params
170
+ end
171
+
172
+ private
173
+
174
+ def contents_action content_type, action, params
175
+ client.contents.send(content_type).send(action, params)
176
+ end
177
+
178
+ def parse_xml input_file
179
+ File.open(input_file) { |fh| Nokogiri::XML(fh) }
180
+ end
181
+
182
+ def parse_all_posts import_file
183
+ parse_xml(import_file).xpath('//item').select do |item|
184
+ item.xpath('wp:post_type').text == 'post' && filtered?(item)
185
+ end
186
+ end
187
+
188
+ def filtered? item
189
+ !ENV['NAME'] || item.xpath('wp:post_name').text == ENV['NAME']
190
+ end
191
+
192
+ def remove_posts filter={ source: SOURCE }
193
+ metrics = OpenStruct.new posts: 0, comments: 0
194
+ posts = all_imported_posts filter
195
+ puts "Preparing to delete #{posts.count} posts..."
196
+ posts.each do |post|
197
+ puts "removing [[ #{post.title} ]]"
198
+ destroy post
199
+ metrics.posts += 1
200
+ post_comments = all_comments post: post._id
201
+ if post_comments.count > 0
202
+ puts " -- also removing #{post_comments.count} comments..."
203
+ post_comments.each do |comment|
204
+ destroy comment
205
+ metrics.comments += 1
206
+ end
207
+ end
208
+ end
209
+ puts "Removed #{metrics.posts} posts and #{metrics.comments} comments."
210
+ end
211
+
212
+ def post_for item
213
+ post = Post.new item.xpath('link').text,
214
+ title: resolve_post_title(item),
215
+ keywords: item.xpath('category').text,
216
+ posted_at: item.xpath('pubDate').text,
217
+ body: HtmlToMarkdown.convert_to_markdown(item.xpath('content:encoded').text),
218
+ teaser: item.xpath('excerpt:encoded').text,
219
+ wp_id: item.xpath('wp:post_id').text,
220
+ source: SOURCE
221
+ post.tap do |post|
222
+ if post.teaser.nil? || post.teaser == ""
223
+ post[:teaser] = post[:body].gsub(/\<.*?\>/, '')[0..300] + '...'
224
+ end
225
+ post.save
226
+ end
227
+ end
228
+
229
+ def comments_for item, post
230
+ item.xpath('wp:comment').map do |comment|
231
+ {
232
+ name: comment.xpath('wp:comment_author').text,
233
+ email: comment.xpath('wp:comment_author_email').text,
234
+ url: comment.xpath('wp:comment_author_url').text,
235
+ commented_at: comment.xpath('wp:comment_date').text,
236
+ content: comment.xpath('wp:comment_content').text,
237
+ post: post._id
238
+ }.tap do |comment|
239
+ if comment[:email].nil? || comment[:email] == ''
240
+ comment[:email] = "n/a (#{comment[:name]})"
241
+ end
242
+ end
243
+ end
244
+ end
245
+
246
+ def resolve_post_title item
247
+ title = item.xpath('title').text
248
+ title = item.xpath('wp:post_name').text if title.blank?
249
+ title = item.xpath('wp:post_id').text if title.blank?
250
+ title
251
+ end
252
+
253
+ def site_id
254
+ @handle ||= @client.options['handle']
255
+ @site_id ||= @client.sites.all.select{ |site| site.handle == @handle }.first._id
256
+ end
257
+
258
+ end
259
+
260
+ end
261
+ end
@@ -0,0 +1,7 @@
1
+ require "locomotivecms/freight/version"
2
+
3
+ module Locomotivecms
4
+ module Freight
5
+ # Your code goes here...
6
+ end
7
+ end
@@ -0,0 +1,38 @@
1
+ # coding: utf-8
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'locomotivecms/freight/version'
5
+
6
+ Gem::Specification.new do |spec|
7
+ spec.name = "locomotivecms-freight"
8
+ spec.version = Locomotivecms::Freight::VERSION
9
+ spec.authors = ["Joel Helbling"]
10
+ spec.email = ["joel@joelhelbling.com"]
11
+
12
+ spec.summary = %q{Loads WordPress content into LocomotiveCMS.}
13
+ spec.description = %q{Specifically posts, comments and images. Rewrites images and internal links.}
14
+ spec.homepage = "https://github.com/joelhelbling/locomotivecms-freight"
15
+ spec.license = "MIT"
16
+
17
+ # Prevent pushing this gem to RubyGems.org by setting 'allowed_push_host', or
18
+ # delete this section to allow pushing this gem to any host.
19
+ if spec.respond_to?(:metadata)
20
+ spec.metadata['allowed_push_host'] = "https://rubygems.org"
21
+ else
22
+ raise "RubyGems 2.0 or newer is required to protect against public gem pushes."
23
+ end
24
+
25
+ spec.files = `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
26
+ spec.bindir = "exe"
27
+ spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
28
+ spec.require_paths = ["lib"]
29
+
30
+ spec.add_dependency "nokogiri", "~> 1.6.7"
31
+ spec.add_dependency "locomotivecms_coal", "~> 1.0"
32
+ spec.add_dependency "activesupport", "~> 4.2"
33
+ spec.add_dependency "squares", "~> 0.3"
34
+ spec.add_development_dependency "bundler", "~> 1.10"
35
+ spec.add_development_dependency "rake", "~> 10.0"
36
+ spec.add_development_dependency "rspec"
37
+ spec.add_development_dependency "pry"
38
+ end
metadata ADDED
@@ -0,0 +1,178 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: locomotivecms-freight
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ platform: ruby
6
+ authors:
7
+ - Joel Helbling
8
+ autorequire:
9
+ bindir: exe
10
+ cert_chain: []
11
+ date: 2016-03-03 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: nokogiri
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: 1.6.7
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: 1.6.7
27
+ - !ruby/object:Gem::Dependency
28
+ name: locomotivecms_coal
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: '1.0'
34
+ type: :runtime
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: '1.0'
41
+ - !ruby/object:Gem::Dependency
42
+ name: activesupport
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - "~>"
46
+ - !ruby/object:Gem::Version
47
+ version: '4.2'
48
+ type: :runtime
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - "~>"
53
+ - !ruby/object:Gem::Version
54
+ version: '4.2'
55
+ - !ruby/object:Gem::Dependency
56
+ name: squares
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - "~>"
60
+ - !ruby/object:Gem::Version
61
+ version: '0.3'
62
+ type: :runtime
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - "~>"
67
+ - !ruby/object:Gem::Version
68
+ version: '0.3'
69
+ - !ruby/object:Gem::Dependency
70
+ name: bundler
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - "~>"
74
+ - !ruby/object:Gem::Version
75
+ version: '1.10'
76
+ type: :development
77
+ prerelease: false
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - "~>"
81
+ - !ruby/object:Gem::Version
82
+ version: '1.10'
83
+ - !ruby/object:Gem::Dependency
84
+ name: rake
85
+ requirement: !ruby/object:Gem::Requirement
86
+ requirements:
87
+ - - "~>"
88
+ - !ruby/object:Gem::Version
89
+ version: '10.0'
90
+ type: :development
91
+ prerelease: false
92
+ version_requirements: !ruby/object:Gem::Requirement
93
+ requirements:
94
+ - - "~>"
95
+ - !ruby/object:Gem::Version
96
+ version: '10.0'
97
+ - !ruby/object:Gem::Dependency
98
+ name: rspec
99
+ requirement: !ruby/object:Gem::Requirement
100
+ requirements:
101
+ - - ">="
102
+ - !ruby/object:Gem::Version
103
+ version: '0'
104
+ type: :development
105
+ prerelease: false
106
+ version_requirements: !ruby/object:Gem::Requirement
107
+ requirements:
108
+ - - ">="
109
+ - !ruby/object:Gem::Version
110
+ version: '0'
111
+ - !ruby/object:Gem::Dependency
112
+ name: pry
113
+ requirement: !ruby/object:Gem::Requirement
114
+ requirements:
115
+ - - ">="
116
+ - !ruby/object:Gem::Version
117
+ version: '0'
118
+ type: :development
119
+ prerelease: false
120
+ version_requirements: !ruby/object:Gem::Requirement
121
+ requirements:
122
+ - - ">="
123
+ - !ruby/object:Gem::Version
124
+ version: '0'
125
+ description: Specifically posts, comments and images. Rewrites images and internal
126
+ links.
127
+ email:
128
+ - joel@joelhelbling.com
129
+ executables: []
130
+ extensions: []
131
+ extra_rdoc_files: []
132
+ files:
133
+ - ".gitignore"
134
+ - ".rspec"
135
+ - ".travis.yml"
136
+ - Gemfile
137
+ - LICENSE.txt
138
+ - README.md
139
+ - Rakefile
140
+ - bin/console
141
+ - bin/freight
142
+ - bin/setup
143
+ - lib/locomotivecms/freight.rb
144
+ - lib/locomotivecms/freight/content_types/comments.yml.erb
145
+ - lib/locomotivecms/freight/content_types/posts.yml.erb
146
+ - lib/locomotivecms/freight/html_to_markdown.rb
147
+ - lib/locomotivecms/freight/post.rb
148
+ - lib/locomotivecms/freight/tasks.rake
149
+ - lib/locomotivecms/freight/tasks.rb
150
+ - lib/locomotivecms/freight/version.rb
151
+ - lib/locomotivecms/freight/wordpress_importer.rb
152
+ - locomotivecms-freight.gemspec
153
+ homepage: https://github.com/joelhelbling/locomotivecms-freight
154
+ licenses:
155
+ - MIT
156
+ metadata:
157
+ allowed_push_host: https://rubygems.org
158
+ post_install_message:
159
+ rdoc_options: []
160
+ require_paths:
161
+ - lib
162
+ required_ruby_version: !ruby/object:Gem::Requirement
163
+ requirements:
164
+ - - ">="
165
+ - !ruby/object:Gem::Version
166
+ version: '0'
167
+ required_rubygems_version: !ruby/object:Gem::Requirement
168
+ requirements:
169
+ - - ">="
170
+ - !ruby/object:Gem::Version
171
+ version: '0'
172
+ requirements: []
173
+ rubyforge_project:
174
+ rubygems_version: 2.4.6
175
+ signing_key:
176
+ specification_version: 4
177
+ summary: Loads WordPress content into LocomotiveCMS.
178
+ test_files: []