word-to-markdown 0.0.2 → 0.0.3

Sign up to get free protection for your applications and to get access to all the features.
Files changed (3) hide show
  1. checksums.yaml +4 -4
  2. data/lib/word-to-markdown.rb +10 -5
  3. metadata +15 -1
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 64e2eeda272f608f1d1bfd5690abdeebdd1f31d8
4
- data.tar.gz: ad2cd8d38fe5893463097ddb41fd973e001642df
3
+ metadata.gz: 438abfb05468da472c652d87e12b80d40f714572
4
+ data.tar.gz: b43be781c4a35c7d5968bb97b8620af3cfd10bb8
5
5
  SHA512:
6
- metadata.gz: 37da10c45c34e29bd671c2d6833e81f754eaea14e07fbca693eb8b5d813db9a5a508d2bb483a0045f0d5df8de3d704db0ec8bfe25f2345c6358dd8d83eaea003
7
- data.tar.gz: cff63c72bbc8fba7e994454875c01b56d9d239fa822762e30d317519d28e3ae7821f9414be923733f4a1cb48c888dcd7fd15c0c346c07d510687f11782507f55
6
+ metadata.gz: 0fe70f87fdb8524e85316fccf6784f23e43e60f614e6037e7289583804b9a0f0fb4930eb79c60c23b67e11c3e2c57c87aa16a36a67d07e3c1aba375938b50614
7
+ data.tar.gz: e0ae583d9d9e343b9b722e236018a108e27ea454aec521607dd807a2dc63a4f96ca2a8392eec0024af1c1f723aa30c83621456c75726559df7180cacb5c3f310
@@ -6,7 +6,7 @@ class WordToMarkdown
6
6
  HEADING_DEPTH = 6 # Number of headings to guess, e.g., h6
7
7
  HEADING_STEP = 100/HEADING_DEPTH
8
8
  MIN_HEADING_SIZE = 20
9
-
9
+
10
10
  LI_SELECTORS = %w[
11
11
  MsoListParagraphCxSpFirst
12
12
  MsoListParagraphCxSpMiddle
@@ -38,7 +38,9 @@ class WordToMarkdown
38
38
  encoding = encoding(html)
39
39
  html = html.force_encoding(encoding).encode("UTF-8", :invalid => :replace, :replace => "")
40
40
  html.gsub! /\<\/?o:[^>]+>/, "" # Strip everything in the office namespace
41
- html.gsub! /\n|\r/," " # remove linebreaks
41
+ html.gsub! /\n|\r/," " # Remove linebreaks
42
+ html.gsub! /“|”/, '"' # Straighten curly double quotes
43
+ html.gsub! /‘|’/, "'" # Straighten curly single quotes
42
44
  html
43
45
  end
44
46
 
@@ -51,7 +53,7 @@ class WordToMarkdown
51
53
  end
52
54
 
53
55
  def html
54
- @doc.to_html
56
+ doc.to_html
55
57
  end
56
58
 
57
59
  def encoding(html)
@@ -78,7 +80,7 @@ class WordToMarkdown
78
80
  def implicit_headings
79
81
  @implicit_headings ||= begin
80
82
  headings = []
81
- @doc.css("[style]").each do |element|
83
+ doc.css("[style]").each do |element|
82
84
  headings.push element unless element.font_size.nil? || element.font_size < MIN_HEADING_SIZE
83
85
  end
84
86
  headings
@@ -112,13 +114,16 @@ class WordToMarkdown
112
114
  # Try to make semantic markup explicit where implied by the export
113
115
  def semanticize!
114
116
  # Convert unnumbered list paragraphs to actual unnumbered lists
115
- @doc.css(".#{LI_SELECTORS.join(",.")}").each { |node| node.node_name = "li" }
117
+ doc.css(".#{LI_SELECTORS.join(",.")}").each { |node| node.node_name = "li" }
116
118
 
117
119
  # Try to guess heading where implicit bassed on font size
118
120
  implicit_headings.each do |element|
119
121
  heading = guess_heading element
120
122
  element.node_name = heading unless heading.nil?
121
123
  end
124
+
125
+ # Removes paragraphs from tables
126
+ doc.search("td p").each { |node| node.node_name = "span" }
122
127
  end
123
128
  end
124
129
 
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: word-to-markdown
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.2
4
+ version: 0.0.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ben Balter
@@ -108,6 +108,20 @@ dependencies:
108
108
  - - ">="
109
109
  - !ruby/object:Gem::Version
110
110
  version: '0'
111
+ - !ruby/object:Gem::Dependency
112
+ name: rerun
113
+ requirement: !ruby/object:Gem::Requirement
114
+ requirements:
115
+ - - ">="
116
+ - !ruby/object:Gem::Version
117
+ version: '0'
118
+ type: :development
119
+ prerelease: false
120
+ version_requirements: !ruby/object:Gem::Requirement
121
+ requirements:
122
+ - - ">="
123
+ - !ruby/object:Gem::Version
124
+ version: '0'
111
125
  description: Ruby Gem to convert Word documents to markdown.
112
126
  email: ben.balter@github.com
113
127
  executables: []