html2md 0.1 → 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.md +60 -0
- data/Rakefile +4 -1
- data/features/markdown.feature +16 -1
- data/lib/html2md/VERSION.rb +1 -1
- data/lib/html2md/document.rb +34 -0
- metadata +1 -1
data/README.md
CHANGED
@@ -0,0 +1,60 @@
|
|
1
|
+
Description
|
2
|
+
===========
|
3
|
+
|
4
|
+
A basic library that converts HTML to Markdown. It is basic in that it only supports basic HTML formatting (No CSS Support [yet])
|
5
|
+
|
6
|
+
Examples
|
7
|
+
========
|
8
|
+
|
9
|
+
``` ruby
|
10
|
+
require 'html2md'
|
11
|
+
require 'open-uri'
|
12
|
+
|
13
|
+
html2md = Html2Md.new(open("Http://www.google.com").read)
|
14
|
+
puts html2md.parse
|
15
|
+
```
|
16
|
+
|
17
|
+
``` markdown
|
18
|
+
GoogleSearch [Images](http://www.google.com/imghp?hl=en&tab=wi) [Videos](http://video.google.com/?hl=en&tab=wv) [Maps](http://maps.google.com/maps?hl=en&tab=wl) [News](http://news.google.com/nwshp?hl=en&tab=wn) [Shopping](http://www.google.com/shopping?hl=en&tab=wf) [Gmail](https://mail.google.com/mail/?tab=wm) [More »](http://www.google.com/intl/en/options/)[iGoogle](/url?sa=p&pref=ig&pval=3&q=http://www.google.com/ig%3Fhl%3Den%26source%3Diglk&usg=AFQjCNFA18XPfgb7dKnXfKz7x7g1GDH1tg) | [Web History](http://www.google.com/history/optout?hl=en) | [Settings](/preferences?hl=en) | [Sign in](https://accounts.google.com/ServiceLogin?hl=en&continue=http://www.google.com/)
|
19
|
+
|
20
|
+
|
21
|
+
<table><tr><td> </td><td>
|
22
|
+
</td><td>[Advanced search](/advanced_search?hl=en)[Language tools](/language_tools?hl=en)</td></tr></table>
|
23
|
+
[Advertising Programs](/intl/en/ads/)[Business Solutions](/services/)[+Google](https://plus.google.com/116899029375914044550)[About Google](/intl/en/about.html)© 2012 - [Privacy](/intl/en/privacy.html)
|
24
|
+
|
25
|
+
|
26
|
+
```
|
27
|
+
|
28
|
+
Build
|
29
|
+
=====
|
30
|
+
This gem is built with Travis-ci.org. http://travis-ci.org/#!/pmorton/html2md
|
31
|
+
|
32
|
+
Compatibility
|
33
|
+
==============
|
34
|
+
Currently not compatiable with jruby, mainly because I am too lazy to fix the build issues. Compatiablity for jruby will be added in the near future.
|
35
|
+
|
36
|
+
|
37
|
+
Contributing
|
38
|
+
============
|
39
|
+
1. Fork this repository
|
40
|
+
2. Create a branch for your proposed changes
|
41
|
+
3. Add tests for your code
|
42
|
+
4. Make sure that all tests pass
|
43
|
+
5. Update Documentation!
|
44
|
+
6. Issue a pull request
|
45
|
+
|
46
|
+
License and Author
|
47
|
+
==================
|
48
|
+
Author:: Paul Morton (<geeksitk@gmail.com>)
|
49
|
+
|
50
|
+
Licensed under the Apache License, Version 2.0 (the "License");
|
51
|
+
you may not use this file except in compliance with the License.
|
52
|
+
You may obtain a copy of the License at
|
53
|
+
|
54
|
+
http://www.apache.org/licenses/LICENSE-2.0
|
55
|
+
|
56
|
+
Unless required by applicable law or agreed to in writing, software
|
57
|
+
distributed under the License is distributed on an "AS IS" BASIS,
|
58
|
+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
59
|
+
See the License for the specific language governing permissions and
|
60
|
+
limitations under the License.
|
data/Rakefile
CHANGED
@@ -4,13 +4,16 @@ lib = File.expand_path('../lib/', __FILE__)
|
|
4
4
|
$:.unshift lib unless $:.include?(lib)
|
5
5
|
|
6
6
|
require 'html2md'
|
7
|
+
require 'open-uri'
|
7
8
|
|
8
9
|
Cucumber::Rake::Task.new do |t|
|
9
10
|
t.cucumber_opts = %w{--format pretty}
|
10
11
|
end
|
11
12
|
|
13
|
+
task :default => [:cucumber]
|
14
|
+
|
12
15
|
desc "Test"
|
13
16
|
task :t, [] => [] do |taks,args|
|
14
|
-
t = Html2Md.new(
|
17
|
+
t = Html2Md.new(open("http://loremipsum.net/about.html").read)
|
15
18
|
puts t.parse
|
16
19
|
end
|
data/features/markdown.feature
CHANGED
@@ -79,4 +79,19 @@ Feature: Markdown
|
|
79
79
|
Scenario: Character data should not have new lines
|
80
80
|
* HTML This is character data \n
|
81
81
|
* I say parse
|
82
|
-
* The markdown should be (This is character data \n\n)
|
82
|
+
* The markdown should be (This is character data \n\n)
|
83
|
+
|
84
|
+
Scenario: First level headers
|
85
|
+
* HTML <h1>This is a H1 Element</h1>
|
86
|
+
* I say parse
|
87
|
+
* The markdown should be (\nThis is a H1 Element\n====================\n)
|
88
|
+
|
89
|
+
Scenario: Second level headers
|
90
|
+
* HTML <h2>This is a H2 Element</h2>
|
91
|
+
* I say parse
|
92
|
+
* The markdown should be (\nThis is a H2 Element\n--------------------\n)
|
93
|
+
|
94
|
+
Scenario: Third level headers
|
95
|
+
* HTML <h3>This is a H3 Element</h3>
|
96
|
+
* I say parse
|
97
|
+
* The markdown should be (\n### This is a H3 Element\n)
|
data/lib/html2md/VERSION.rb
CHANGED
data/lib/html2md/document.rb
CHANGED
@@ -13,6 +13,7 @@ class Html2Md
|
|
13
13
|
@allowed_tags = ['tr','td','th','table']
|
14
14
|
@current_list = -1
|
15
15
|
@list_tree = []
|
16
|
+
@last_cdata_length = 0
|
16
17
|
|
17
18
|
end
|
18
19
|
|
@@ -91,6 +92,38 @@ class Html2Md
|
|
91
92
|
@markdown << "\n\n"
|
92
93
|
end
|
93
94
|
|
95
|
+
def start_h1(attributes)
|
96
|
+
@markdown << "\n"
|
97
|
+
end
|
98
|
+
|
99
|
+
def end_h1(attributes)
|
100
|
+
@markdown << "\n"
|
101
|
+
@last_cdata_length.times do
|
102
|
+
@markdown << "="
|
103
|
+
end
|
104
|
+
@markdown << "\n"
|
105
|
+
end
|
106
|
+
|
107
|
+
def start_h2(attributes)
|
108
|
+
@markdown << "\n"
|
109
|
+
end
|
110
|
+
|
111
|
+
def end_h2(attributes)
|
112
|
+
@markdown << "\n"
|
113
|
+
@last_cdata_length.times do
|
114
|
+
@markdown << "-"
|
115
|
+
end
|
116
|
+
@markdown << "\n"
|
117
|
+
end
|
118
|
+
|
119
|
+
def start_h3(attributes)
|
120
|
+
@markdown << "\n### "
|
121
|
+
end
|
122
|
+
|
123
|
+
def end_h3(attributes)
|
124
|
+
@markdown << "\n"
|
125
|
+
end
|
126
|
+
|
94
127
|
def start_a(attributes)
|
95
128
|
attributes.each do | attrib |
|
96
129
|
if attrib[0].downcase.eql? 'href'
|
@@ -163,6 +196,7 @@ class Html2Md
|
|
163
196
|
end
|
164
197
|
|
165
198
|
def characters c
|
199
|
+
@last_cdata_length = c.chomp.length
|
166
200
|
if @list_tree[-1]
|
167
201
|
@markdown << c.chomp.lstrip.rstrip
|
168
202
|
else
|