word-to-markdown 0.0.2 → 0.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (3) hide show
  1. checksums.yaml +4 -4
  2. data/lib/word-to-markdown.rb +10 -5
  3. metadata +15 -1
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 64e2eeda272f608f1d1bfd5690abdeebdd1f31d8
4
- data.tar.gz: ad2cd8d38fe5893463097ddb41fd973e001642df
3
+ metadata.gz: 438abfb05468da472c652d87e12b80d40f714572
4
+ data.tar.gz: b43be781c4a35c7d5968bb97b8620af3cfd10bb8
5
5
  SHA512:
6
- metadata.gz: 37da10c45c34e29bd671c2d6833e81f754eaea14e07fbca693eb8b5d813db9a5a508d2bb483a0045f0d5df8de3d704db0ec8bfe25f2345c6358dd8d83eaea003
7
- data.tar.gz: cff63c72bbc8fba7e994454875c01b56d9d239fa822762e30d317519d28e3ae7821f9414be923733f4a1cb48c888dcd7fd15c0c346c07d510687f11782507f55
6
+ metadata.gz: 0fe70f87fdb8524e85316fccf6784f23e43e60f614e6037e7289583804b9a0f0fb4930eb79c60c23b67e11c3e2c57c87aa16a36a67d07e3c1aba375938b50614
7
+ data.tar.gz: e0ae583d9d9e343b9b722e236018a108e27ea454aec521607dd807a2dc63a4f96ca2a8392eec0024af1c1f723aa30c83621456c75726559df7180cacb5c3f310
@@ -6,7 +6,7 @@ class WordToMarkdown
6
6
  HEADING_DEPTH = 6 # Number of headings to guess, e.g., h6
7
7
  HEADING_STEP = 100/HEADING_DEPTH
8
8
  MIN_HEADING_SIZE = 20
9
-
9
+
10
10
  LI_SELECTORS = %w[
11
11
  MsoListParagraphCxSpFirst
12
12
  MsoListParagraphCxSpMiddle
@@ -38,7 +38,9 @@ class WordToMarkdown
38
38
  encoding = encoding(html)
39
39
  html = html.force_encoding(encoding).encode("UTF-8", :invalid => :replace, :replace => "")
40
40
  html.gsub! /\<\/?o:[^>]+>/, "" # Strip everything in the office namespace
41
- html.gsub! /\n|\r/," " # remove linebreaks
41
+ html.gsub! /\n|\r/," " # Remove linebreaks
42
+ html.gsub! /“|”/, '"' # Straighten curly double quotes
43
+ html.gsub! /‘|’/, "'" # Straighten curly single quotes
42
44
  html
43
45
  end
44
46
 
@@ -51,7 +53,7 @@ class WordToMarkdown
51
53
  end
52
54
 
53
55
  def html
54
- @doc.to_html
56
+ doc.to_html
55
57
  end
56
58
 
57
59
  def encoding(html)
@@ -78,7 +80,7 @@ class WordToMarkdown
78
80
  def implicit_headings
79
81
  @implicit_headings ||= begin
80
82
  headings = []
81
- @doc.css("[style]").each do |element|
83
+ doc.css("[style]").each do |element|
82
84
  headings.push element unless element.font_size.nil? || element.font_size < MIN_HEADING_SIZE
83
85
  end
84
86
  headings
@@ -112,13 +114,16 @@ class WordToMarkdown
112
114
  # Try to make semantic markup explicit where implied by the export
113
115
  def semanticize!
114
116
  # Convert unnumbered list paragraphs to actual unnumbered lists
115
- @doc.css(".#{LI_SELECTORS.join(",.")}").each { |node| node.node_name = "li" }
117
+ doc.css(".#{LI_SELECTORS.join(",.")}").each { |node| node.node_name = "li" }
116
118
 
117
119
  # Try to guess heading where implicit bassed on font size
118
120
  implicit_headings.each do |element|
119
121
  heading = guess_heading element
120
122
  element.node_name = heading unless heading.nil?
121
123
  end
124
+
125
+ # Removes paragraphs from tables
126
+ doc.search("td p").each { |node| node.node_name = "span" }
122
127
  end
123
128
  end
124
129
 
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: word-to-markdown
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.2
4
+ version: 0.0.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ben Balter
@@ -108,6 +108,20 @@ dependencies:
108
108
  - - ">="
109
109
  - !ruby/object:Gem::Version
110
110
  version: '0'
111
+ - !ruby/object:Gem::Dependency
112
+ name: rerun
113
+ requirement: !ruby/object:Gem::Requirement
114
+ requirements:
115
+ - - ">="
116
+ - !ruby/object:Gem::Version
117
+ version: '0'
118
+ type: :development
119
+ prerelease: false
120
+ version_requirements: !ruby/object:Gem::Requirement
121
+ requirements:
122
+ - - ">="
123
+ - !ruby/object:Gem::Version
124
+ version: '0'
111
125
  description: Ruby Gem to convert Word documents to markdown.
112
126
  email: ben.balter@github.com
113
127
  executables: []