html2md 0.1 → 0.1.1
Sign up to get free protection for your applications and to get access to all the features.
- data/README.md +60 -0
- data/Rakefile +4 -1
- data/features/markdown.feature +16 -1
- data/lib/html2md/VERSION.rb +1 -1
- data/lib/html2md/document.rb +34 -0
- metadata +1 -1
data/README.md
CHANGED
@@ -0,0 +1,60 @@
|
|
1
|
+
Description
|
2
|
+
===========
|
3
|
+
|
4
|
+
A basic library that converts HTML to Markdown. It is basic in that it only supports basic HTML formatting (No CSS Support [yet])
|
5
|
+
|
6
|
+
Examples
|
7
|
+
========
|
8
|
+
|
9
|
+
``` ruby
|
10
|
+
require 'html2md'
|
11
|
+
require 'open-uri'
|
12
|
+
|
13
|
+
html2md = Html2Md.new(open("Http://www.google.com").read)
|
14
|
+
puts html2md.parse
|
15
|
+
```
|
16
|
+
|
17
|
+
``` markdown
|
18
|
+
GoogleSearch [Images](http://www.google.com/imghp?hl=en&tab=wi) [Videos](http://video.google.com/?hl=en&tab=wv) [Maps](http://maps.google.com/maps?hl=en&tab=wl) [News](http://news.google.com/nwshp?hl=en&tab=wn) [Shopping](http://www.google.com/shopping?hl=en&tab=wf) [Gmail](https://mail.google.com/mail/?tab=wm) [More »](http://www.google.com/intl/en/options/)[iGoogle](/url?sa=p&pref=ig&pval=3&q=http://www.google.com/ig%3Fhl%3Den%26source%3Diglk&usg=AFQjCNFA18XPfgb7dKnXfKz7x7g1GDH1tg) | [Web History](http://www.google.com/history/optout?hl=en) | [Settings](/preferences?hl=en) | [Sign in](https://accounts.google.com/ServiceLogin?hl=en&continue=http://www.google.com/)
|
19
|
+
|
20
|
+
|
21
|
+
<table><tr><td> </td><td>
|
22
|
+
</td><td>[Advanced search](/advanced_search?hl=en)[Language tools](/language_tools?hl=en)</td></tr></table>
|
23
|
+
[Advertising Programs](/intl/en/ads/)[Business Solutions](/services/)[+Google](https://plus.google.com/116899029375914044550)[About Google](/intl/en/about.html)© 2012 - [Privacy](/intl/en/privacy.html)
|
24
|
+
|
25
|
+
|
26
|
+
```
|
27
|
+
|
28
|
+
Build
|
29
|
+
=====
|
30
|
+
This gem is built with Travis-ci.org. http://travis-ci.org/#!/pmorton/html2md
|
31
|
+
|
32
|
+
Compatibility
|
33
|
+
==============
|
34
|
+
Currently not compatiable with jruby, mainly because I am too lazy to fix the build issues. Compatiablity for jruby will be added in the near future.
|
35
|
+
|
36
|
+
|
37
|
+
Contributing
|
38
|
+
============
|
39
|
+
1. Fork this repository
|
40
|
+
2. Create a branch for your proposed changes
|
41
|
+
3. Add tests for your code
|
42
|
+
4. Make sure that all tests pass
|
43
|
+
5. Update Documentation!
|
44
|
+
6. Issue a pull request
|
45
|
+
|
46
|
+
License and Author
|
47
|
+
==================
|
48
|
+
Author:: Paul Morton (<geeksitk@gmail.com>)
|
49
|
+
|
50
|
+
Licensed under the Apache License, Version 2.0 (the "License");
|
51
|
+
you may not use this file except in compliance with the License.
|
52
|
+
You may obtain a copy of the License at
|
53
|
+
|
54
|
+
http://www.apache.org/licenses/LICENSE-2.0
|
55
|
+
|
56
|
+
Unless required by applicable law or agreed to in writing, software
|
57
|
+
distributed under the License is distributed on an "AS IS" BASIS,
|
58
|
+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
59
|
+
See the License for the specific language governing permissions and
|
60
|
+
limitations under the License.
|
data/Rakefile
CHANGED
@@ -4,13 +4,16 @@ lib = File.expand_path('../lib/', __FILE__)
|
|
4
4
|
$:.unshift lib unless $:.include?(lib)
|
5
5
|
|
6
6
|
require 'html2md'
|
7
|
+
require 'open-uri'
|
7
8
|
|
8
9
|
Cucumber::Rake::Task.new do |t|
|
9
10
|
t.cucumber_opts = %w{--format pretty}
|
10
11
|
end
|
11
12
|
|
13
|
+
task :default => [:cucumber]
|
14
|
+
|
12
15
|
desc "Test"
|
13
16
|
task :t, [] => [] do |taks,args|
|
14
|
-
t = Html2Md.new(
|
17
|
+
t = Html2Md.new(open("http://loremipsum.net/about.html").read)
|
15
18
|
puts t.parse
|
16
19
|
end
|
data/features/markdown.feature
CHANGED
@@ -79,4 +79,19 @@ Feature: Markdown
|
|
79
79
|
Scenario: Character data should not have new lines
|
80
80
|
* HTML This is character data \n
|
81
81
|
* I say parse
|
82
|
-
* The markdown should be (This is character data \n\n)
|
82
|
+
* The markdown should be (This is character data \n\n)
|
83
|
+
|
84
|
+
Scenario: First level headers
|
85
|
+
* HTML <h1>This is a H1 Element</h1>
|
86
|
+
* I say parse
|
87
|
+
* The markdown should be (\nThis is a H1 Element\n====================\n)
|
88
|
+
|
89
|
+
Scenario: Second level headers
|
90
|
+
* HTML <h2>This is a H2 Element</h2>
|
91
|
+
* I say parse
|
92
|
+
* The markdown should be (\nThis is a H2 Element\n--------------------\n)
|
93
|
+
|
94
|
+
Scenario: Third level headers
|
95
|
+
* HTML <h3>This is a H3 Element</h3>
|
96
|
+
* I say parse
|
97
|
+
* The markdown should be (\n### This is a H3 Element\n)
|
data/lib/html2md/VERSION.rb
CHANGED
data/lib/html2md/document.rb
CHANGED
@@ -13,6 +13,7 @@ class Html2Md
|
|
13
13
|
@allowed_tags = ['tr','td','th','table']
|
14
14
|
@current_list = -1
|
15
15
|
@list_tree = []
|
16
|
+
@last_cdata_length = 0
|
16
17
|
|
17
18
|
end
|
18
19
|
|
@@ -91,6 +92,38 @@ class Html2Md
|
|
91
92
|
@markdown << "\n\n"
|
92
93
|
end
|
93
94
|
|
95
|
+
def start_h1(attributes)
|
96
|
+
@markdown << "\n"
|
97
|
+
end
|
98
|
+
|
99
|
+
def end_h1(attributes)
|
100
|
+
@markdown << "\n"
|
101
|
+
@last_cdata_length.times do
|
102
|
+
@markdown << "="
|
103
|
+
end
|
104
|
+
@markdown << "\n"
|
105
|
+
end
|
106
|
+
|
107
|
+
def start_h2(attributes)
|
108
|
+
@markdown << "\n"
|
109
|
+
end
|
110
|
+
|
111
|
+
def end_h2(attributes)
|
112
|
+
@markdown << "\n"
|
113
|
+
@last_cdata_length.times do
|
114
|
+
@markdown << "-"
|
115
|
+
end
|
116
|
+
@markdown << "\n"
|
117
|
+
end
|
118
|
+
|
119
|
+
def start_h3(attributes)
|
120
|
+
@markdown << "\n### "
|
121
|
+
end
|
122
|
+
|
123
|
+
def end_h3(attributes)
|
124
|
+
@markdown << "\n"
|
125
|
+
end
|
126
|
+
|
94
127
|
def start_a(attributes)
|
95
128
|
attributes.each do | attrib |
|
96
129
|
if attrib[0].downcase.eql? 'href'
|
@@ -163,6 +196,7 @@ class Html2Md
|
|
163
196
|
end
|
164
197
|
|
165
198
|
def characters c
|
199
|
+
@last_cdata_length = c.chomp.length
|
166
200
|
if @list_tree[-1]
|
167
201
|
@markdown << c.chomp.lstrip.rstrip
|
168
202
|
else
|