hparser 0.3.0 → 0.4.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (63) hide show
  1. data/.gitignore +2 -0
  2. data/.travis.yml +11 -0
  3. data/ChangeLog +4 -0
  4. data/Gemfile +3 -0
  5. data/README.md +40 -0
  6. data/Rakefile +12 -0
  7. data/VERSION +1 -0
  8. data/hparser.gemspec +21 -0
  9. data/lib/hparser/block/dl.rb +4 -4
  10. data/lib/hparser/block/footnote_list.rb +19 -0
  11. data/lib/hparser/block/head.rb +2 -2
  12. data/lib/hparser/block/list.rb +8 -8
  13. data/lib/hparser/block/p.rb +4 -3
  14. data/lib/hparser/block/pair.rb +12 -7
  15. data/lib/hparser/block/quote.rb +32 -2
  16. data/lib/hparser/block/raw.rb +34 -0
  17. data/lib/hparser/block/see_more.rb +31 -0
  18. data/lib/hparser/block/super_pre.rb +21 -3
  19. data/lib/hparser/block/table.rb +4 -4
  20. data/lib/hparser/hatena.rb +3 -1
  21. data/lib/hparser/html.rb +181 -13
  22. data/lib/hparser/inline/comment.rb +27 -0
  23. data/lib/hparser/inline/footnote.rb +34 -0
  24. data/lib/hparser/inline/fotolife.rb +40 -0
  25. data/lib/hparser/inline/hatena_id.rb +7 -6
  26. data/lib/hparser/inline/parser.rb +3 -2
  27. data/lib/hparser/inline/tex.rb +27 -0
  28. data/lib/hparser/inline/text.rb +3 -2
  29. data/lib/hparser/inline/url.rb +20 -6
  30. data/lib/hparser/latex.rb +273 -0
  31. data/lib/hparser/parser.rb +17 -1
  32. data/lib/hparser/text.rb +42 -0
  33. data/lib/hparser/util/line_scanner.rb +3 -2
  34. data/lib/hparser.rb +1 -0
  35. data/test/integration_texts/error1.ok.hatena +23 -0
  36. data/test/test_block.rb +65 -2
  37. data/test/test_bruteforce.rb +48 -0
  38. data/test/test_dl.rb +13 -1
  39. data/test/test_footnote.rb +42 -0
  40. data/test/test_fotolife.rb +29 -0
  41. data/test/test_from_perl/01_module.t +559 -0
  42. data/test/test_from_perl/02_module_extend.t +36 -0
  43. data/test/test_from_perl/10_autolink.t +78 -0
  44. data/test/test_from_perl/11_autolink_extend.t +43 -0
  45. data/test/test_hatena.rb +2 -2
  46. data/test/test_head.rb +7 -1
  47. data/test/test_helper.rb +11 -0
  48. data/test/test_html.rb +39 -3
  49. data/test/test_id.rb +1 -1
  50. data/test/test_inline.rb +13 -1
  51. data/test/test_inline_html.rb +37 -2
  52. data/test/test_integration.rb +20 -0
  53. data/test/test_latex.rb +101 -0
  54. data/test/test_p.rb +23 -3
  55. data/test/test_pair.rb +22 -4
  56. data/test/test_quote.rb +69 -0
  57. data/test/test_see_more.rb +28 -0
  58. data/test/test_table.rb +1 -1
  59. data/test/test_tex.rb +24 -0
  60. data/test/test_text.rb +12 -2
  61. data/test/test_url.rb +39 -2
  62. metadata +141 -58
  63. data/README +0 -17
@@ -0,0 +1,559 @@
1
+ use File::Spec;
2
+ use strict;
3
+ use warnings;
4
+ use Test::Base;
5
+ use Text::Hatena;
6
+
7
+ filters {
8
+ text => ['text_hatena', 'omit_indent', 'chomp'],
9
+ line => ['text_hatena_p', 'omit_indent', 'chomp'],
10
+ html => ['omit_indent', 'chomp'],
11
+ };
12
+
13
+ sub text_hatena { Text::Hatena->parse(shift, 'body') }
14
+ sub text_hatena_p { Text::Hatena->parse(shift, 'p') }
15
+ sub omit_indent {
16
+ (my $text = shift) =~ s/^[\t\s]+//gmo;
17
+ return $text;
18
+ }
19
+
20
+ #use Carp;
21
+ #local $SIG{'__WARN__'} = \&Carp::confess;
22
+
23
+ run_is;
24
+
25
+ __END__
26
+ === h3
27
+ --- text
28
+ *Hello, World!
29
+ --- html
30
+ <div class="section">
31
+ <h3>Hello, World!</h3>
32
+ </div>
33
+
34
+ === h3_2
35
+ --- text
36
+ *Hello, World!
37
+ This is Text::Hatena.
38
+ --- html
39
+ <div class="section">
40
+ <h3>Hello, World!</h3>
41
+ <p>This is Text::Hatena.</p>
42
+ </div>
43
+
44
+ === h3_3
45
+ --- text
46
+ *Hello, World!
47
+ This is Text::Hatena.
48
+ --- html
49
+ <div class="section">
50
+ <p> *Hello, World!</p>
51
+ <p>This is Text::Hatena.</p>
52
+ </div>
53
+
54
+ === h3_4
55
+ --- text
56
+ *Good morning
57
+
58
+ It's morning.
59
+
60
+ *Good afternoon
61
+
62
+ Beautiful day!
63
+ --- html
64
+ <div class="section">
65
+ <h3>Good morning</h3>
66
+
67
+ <p>It's morning.</p>
68
+ </div>
69
+ <div class="section">
70
+ <h3>Good afternoon</h3>
71
+
72
+ <p>Beautiful day!</p>
73
+ </div>
74
+
75
+ === h4
76
+ --- text
77
+ **Hello, Japan!
78
+
79
+ This is Text::Hatena.
80
+ --- html
81
+ <div class="section">
82
+ <h4>Hello, Japan!</h4>
83
+
84
+ <p>This is Text::Hatena.</p>
85
+ </div>
86
+
87
+ === h5
88
+ --- text
89
+ ***Hello, Tokyo!
90
+
91
+ This is Text::Hatena.
92
+ --- html
93
+ <div class="section">
94
+ <h5>Hello, Tokyo!</h5>
95
+
96
+ <p>This is Text::Hatena.</p>
97
+ </div>
98
+
99
+ === blockquote
100
+ --- text
101
+ >>
102
+ quoted
103
+ <<
104
+ --- html
105
+ <div class="section">
106
+ <blockquote>
107
+ <p>quoted</p>
108
+ </blockquote>
109
+ </div>
110
+
111
+ === blockquote2
112
+ --- text
113
+ >>
114
+ quoted
115
+ >>
116
+ quoted quoted
117
+ <<
118
+ <<
119
+ --- html
120
+ <div class="section">
121
+ <blockquote>
122
+ <p>quoted</p>
123
+ <blockquote>
124
+ <p>quoted quoted</p>
125
+ </blockquote>
126
+ </blockquote>
127
+ </div>
128
+
129
+ === blockquote3
130
+ --- text
131
+ >>
132
+ unquoted
133
+ <<
134
+ --- html
135
+ <div class="section">
136
+ <p> >></p>
137
+ <p> unquoted</p>
138
+ <p> <<</p>
139
+ </div>
140
+
141
+ === blockquote4
142
+ --- text
143
+ >http://www.hatena.ne.jp/>
144
+ Hatena
145
+ <<
146
+ --- html
147
+ <div class="section">
148
+ <blockquote title="http://www.hatena.ne.jp/" cite="http://www.hatena.ne.jp/">
149
+ <p>Hatena</p>
150
+ <cite><a href="http://www.hatena.ne.jp/">http://www.hatena.ne.jp/</a></cite>
151
+ </blockquote>
152
+ </div>
153
+
154
+ === blockquote5
155
+ --- text
156
+ >http://www.hatena.ne.jp/:title=Hatena>
157
+ Hatena
158
+ <<
159
+ --- html
160
+ <div class="section">
161
+ <blockquote title="Hatena" cite="http://www.hatena.ne.jp/">
162
+ <p>Hatena</p>
163
+ <cite><a href="http://www.hatena.ne.jp/">Hatena</a></cite>
164
+ </blockquote>
165
+ </div>
166
+
167
+ === dl
168
+ --- text
169
+ :cinnamon:dog
170
+ --- html
171
+ <div class="section">
172
+ <dl>
173
+ <dt>cinnamon</dt>
174
+ <dd>dog</dd>
175
+ </dl>
176
+ </div>
177
+
178
+ === dl2
179
+ --- text
180
+ :cinnamon:dog
181
+ :tama:cat
182
+ --- html
183
+ <div class="section">
184
+ <dl>
185
+ <dt>cinnamon</dt>
186
+ <dd>dog</dd>
187
+ <dt>tama</dt>
188
+ <dd>cat</dd>
189
+ </dl>
190
+ </div>
191
+
192
+ === ul
193
+ --- text
194
+ -komono
195
+ -kyoto
196
+ -shibuya
197
+ --- html
198
+ <div class="section">
199
+ <ul>
200
+ <li>komono</li>
201
+ <li>kyoto</li>
202
+ <li>shibuya</li>
203
+ </ul>
204
+ </div>
205
+
206
+ === ul2
207
+ --- text
208
+ -komono
209
+ --kyoto
210
+ ---shibuya
211
+ --hachiyama
212
+ --- html
213
+ <div class="section">
214
+ <ul>
215
+ <li>komono
216
+ <ul>
217
+ <li>kyoto
218
+ <ul>
219
+ <li>shibuya</li>
220
+ </ul>
221
+ </li>
222
+ <li>hachiyama</li>
223
+ </ul>
224
+ </li>
225
+ </ul>
226
+ </div>
227
+
228
+ === ul3
229
+ --- text
230
+ -list
231
+ --ul
232
+ --ol
233
+ -pre
234
+ --- html
235
+ <div class="section">
236
+ <ul>
237
+ <li>list
238
+ <ul>
239
+ <li>ul</li>
240
+ <li>ol</li>
241
+ </ul>
242
+ </li>
243
+ <li>pre</li>
244
+ </ul>
245
+ </div>
246
+
247
+ === ul4
248
+ --- text
249
+ - wrong list
250
+ - what's happen?
251
+ --- html
252
+ <div class="section">
253
+ <p> - wrong list</p>
254
+ <p> - what's happen?</p>
255
+ </div>
256
+
257
+ === ul5
258
+ --- text
259
+ - right list
260
+ - wrong list
261
+ - what's happen?
262
+ --- html
263
+ <div class="section">
264
+ <ul>
265
+ <li> right list</li>
266
+ </ul>
267
+ <p> - wrong list</p>
268
+ <p> - what's happen?</p>
269
+ </div>
270
+
271
+ === ul6
272
+ --- text
273
+ -Japan
274
+ --Kyoto
275
+ --Tokyo
276
+ -USA
277
+ --Mountain View
278
+ --- html
279
+ <div class="section">
280
+ <ul>
281
+ <li>Japan
282
+ <ul>
283
+ <li>Kyoto</li>
284
+ <li>Tokyo</li>
285
+ </ul>
286
+ </li>
287
+ <li>USA
288
+ <ul>
289
+ <li>Mountain View</li>
290
+ </ul>
291
+ </li>
292
+ </ul>
293
+ </div>
294
+
295
+ === ul7
296
+ --- text
297
+ -komono
298
+ --kyoto
299
+ ---shibuya
300
+ --hachiyama
301
+ --- html
302
+ <div class="section">
303
+ <ul>
304
+ <li>komono
305
+ <ul>
306
+ <li>kyoto
307
+ <ul>
308
+ <li>shibuya</li>
309
+ </ul>
310
+ </li>
311
+ <li>hachiyama</li>
312
+ </ul>
313
+ </li>
314
+ </ul>
315
+ </div>
316
+
317
+ === ol
318
+ --- text
319
+ +Register
320
+ +Login
321
+ +Write your blog
322
+ --- html
323
+ <div class="section">
324
+ <ol>
325
+ <li>Register</li>
326
+ <li>Login</li>
327
+ <li>Write your blog</li>
328
+ </ol>
329
+ </div>
330
+
331
+ === ol2
332
+ --- text
333
+ -Steps
334
+ ++Register
335
+ ++Login
336
+ ++Write your blog
337
+ -Option
338
+ --180pt
339
+ --- html
340
+ <div class="section">
341
+ <ul>
342
+ <li>Steps
343
+ <ol>
344
+ <li>Register</li>
345
+ <li>Login</li>
346
+ <li>Write your blog</li>
347
+ </ol>
348
+ </li>
349
+ <li>Option
350
+ <ul>
351
+ <li>180pt</li>
352
+ </ul>
353
+ </li>
354
+ </ul>
355
+ </div>
356
+
357
+ === super_pre
358
+ --- text
359
+ >||
360
+ #!/usr/bin/perl
361
+
362
+ my $url = 'http://d.hatena.ne.jp/';
363
+ ||<
364
+ --- html
365
+ <div class="section">
366
+ <pre>
367
+ #!/usr/bin/perl
368
+
369
+ my $url = 'http://d.hatena.ne.jp/';
370
+ </pre>
371
+ </div>
372
+
373
+ === super_pre_fail
374
+ --- text
375
+ >||
376
+ #!/usr/bin/perl
377
+
378
+ my $name = 'jkondo'||<
379
+ --- html
380
+ <div class="section">
381
+ <p>>||</p>
382
+ <p>#!/usr/bin/perl</p>
383
+
384
+ <p>my $name = 'jkondo'||<</p>
385
+ </div>
386
+
387
+ === super_pre2
388
+ --- text
389
+ >|perl|
390
+ #!/usr/bin/perl
391
+
392
+ my $url = 'http://d.hatena.ne.jp/';
393
+ ||<
394
+ --- html
395
+ <div class="section">
396
+ <pre>
397
+ #!/usr/bin/perl
398
+
399
+ my $url = 'http://d.hatena.ne.jp/';
400
+ </pre>
401
+ </div>
402
+
403
+ === super_pre3
404
+ --- text
405
+ >||
406
+ >>
407
+ unquoted
408
+ <<
409
+ - unlisted
410
+ http://www.hatena.com/ unanchored.
411
+ ||<
412
+ --- html
413
+ <div class="section">
414
+ <pre>
415
+ >>
416
+ unquoted
417
+ <<
418
+ - unlisted
419
+ http://www.hatena.com/ unanchored.
420
+ </pre>
421
+ </div>
422
+
423
+ === super_pre4
424
+ --- text
425
+ >||
426
+ >>
427
+ unquoted
428
+ <<
429
+ - unlisted
430
+ http://www.hatena.com/ unanchored.
431
+ <a href="http://www.hatena.com/">escaped tags</a>
432
+ ||<
433
+ --- html
434
+ <div class="section">
435
+ <pre>
436
+ >>
437
+ unquoted
438
+ <<
439
+ - unlisted
440
+ http://www.hatena.com/ unanchored.
441
+ <a href="http://www.hatena.com/">escaped tags</a>
442
+ </pre>
443
+ </div>
444
+
445
+ === pre
446
+ --- text
447
+ >|
448
+ #!/usr/bin/perl
449
+ use strict;
450
+ use warnings;
451
+
452
+ say 'Hello, World!';
453
+ |<
454
+ --- html
455
+ <div class="section">
456
+ <pre>
457
+ #!/usr/bin/perl
458
+ use strict;
459
+ use warnings;
460
+
461
+ say 'Hello, World!';
462
+ </pre>
463
+ </div>
464
+
465
+ === pre2
466
+ --- text
467
+ >|
468
+ To: info@test.com
469
+ Subject: This is Test.
470
+
471
+ Hello, This is test from Text::Hatena.
472
+ Don't reply to this email.
473
+
474
+ --
475
+ jkondo
476
+ |<
477
+ --- html
478
+ <div class="section">
479
+ <pre>
480
+ To: info@test.com
481
+ Subject: This is Test.
482
+
483
+ Hello, This is test from Text::Hatena.
484
+ Don't reply to this email.
485
+
486
+ --
487
+ jkondo
488
+ </pre>
489
+ </div>
490
+
491
+ === table
492
+ --- text
493
+ |*Lang|*Module|
494
+ |Perl|Text::Hatena|
495
+ --- html
496
+ <div class="section">
497
+ <table>
498
+ <tr>
499
+ <th>Lang</th>
500
+ <th>Module</th>
501
+ </tr>
502
+ <tr>
503
+ <td>Perl</td>
504
+ <td>Text::Hatena</td>
505
+ </tr>
506
+ </table>
507
+ </div>
508
+
509
+ === cdata
510
+ --- text
511
+ ><div>no paragraph line</div><
512
+ paragraph line
513
+ --- html
514
+ <div class="section">
515
+ <div>no paragraph line</div>
516
+ <p>paragraph line</p>
517
+ </div>
518
+
519
+ === cdata2
520
+ --- text
521
+ ><blockquote>
522
+ <p>Hello I am writing HTML tags by myself</p>
523
+ </blockquote><
524
+ --- html
525
+ <div class="section">
526
+ <blockquote>
527
+ <p>Hello I am writing HTML tags by myself</p>
528
+ </blockquote>
529
+ </div>
530
+
531
+ === cdata3
532
+ --- text
533
+ ><blockquote><
534
+ Please add p tags for me.
535
+ It's candy blockquote.
536
+ ></blockquote><
537
+ --- html
538
+ <div class="section">
539
+ <blockquote>
540
+ <p>Please add p tags for me.</p>
541
+ <p>It's candy blockquote.</p>
542
+ </blockquote>
543
+ </div>
544
+
545
+ === autolink
546
+ --- text
547
+ *Hello World!
548
+
549
+ Here is Text::Hatena.
550
+ CPAN site: http://search.cpan.org/dist/Text-Hatena/
551
+ Have fun!
552
+ --- html
553
+ <div class="section">
554
+ <h3>Hello World!</h3>
555
+
556
+ <p>Here is Text::Hatena.</p>
557
+ <p>CPAN site: <a href="http://search.cpan.org/dist/Text-Hatena/">http://search.cpan.org/dist/Text-Hatena/</a></p>
558
+ <p>Have fun!</p>
559
+ </div>
@@ -0,0 +1,36 @@
1
+ use File::Spec;
2
+ use lib File::Spec->catdir('t', 'lib');
3
+ use strict;
4
+ use warnings;
5
+ use Test::Base;
6
+ use MyParser;
7
+
8
+ filters {
9
+ text => ['my_parse', 'omit_indent', 'chomp'],
10
+ html => ['omit_indent', 'chomp'],
11
+ };
12
+
13
+ sub my_parse { MyParser->parse(shift, 'body') }
14
+ sub omit_indent {
15
+ (my $text = shift) =~ s/^[\t\s]+//gmo;
16
+ return $text;
17
+ }
18
+
19
+ run_is;
20
+
21
+ __END__
22
+ === h3
23
+ --- text
24
+ *Hello, World!
25
+ --- html
26
+ <div class="section">
27
+ <h3>Hello, World!</h3>
28
+ </div>
29
+
30
+ === h3_2
31
+ --- text
32
+ *1172604381*Hello, World!
33
+ --- html
34
+ <div class="section">
35
+ <h3>Hello, World!<span class="timestamp">1172604381</span></h3>
36
+ </div>
@@ -0,0 +1,78 @@
1
+ use strict;
2
+ use warnings;
3
+ use Test::Base;
4
+ use Text::Hatena::AutoLink;
5
+
6
+ filters {
7
+ text => ['text_hatena_autolink', 'chomp'],
8
+ html => ['chomp'],
9
+ };
10
+
11
+ sub text_hatena_autolink {
12
+ Text::Hatena::AutoLink->parse(shift);
13
+ }
14
+
15
+ run_is;
16
+
17
+ __END__
18
+ === http
19
+ --- text
20
+ http://www.hatena.com/
21
+ --- html
22
+ <a href="http://www.hatena.com/">http://www.hatena.com/</a>
23
+
24
+ === http2
25
+ --- text
26
+ hatena: http://www.hatena.com/
27
+ --- html
28
+ hatena: <a href="http://www.hatena.com/">http://www.hatena.com/</a>
29
+
30
+ === http3
31
+ --- text
32
+ hatena: http://www.hatena.com/
33
+ hatena(jp): http://www.hatena.ne.jp/
34
+ --- html
35
+ hatena: <a href="http://www.hatena.com/">http://www.hatena.com/</a>
36
+ hatena(jp): <a href="http://www.hatena.ne.jp/">http://www.hatena.ne.jp/</a>
37
+
38
+ === http_image
39
+ --- text
40
+ [http://www.hatena.ne.jp/images/top/h1.gif:image]
41
+ --- html
42
+ <a href="http://www.hatena.ne.jp/images/top/h1.gif"><img src="http://www.hatena.ne.jp/images/top/h1.gif" alt="http://www.hatena.ne.jp/images/top/h1.gif" /></a>
43
+
44
+ === http_image2
45
+ --- text
46
+ [http://www.hatena.ne.jp/images/top/h1.gif:image:w150]
47
+ --- html
48
+ <a href="http://www.hatena.ne.jp/images/top/h1.gif"><img src="http://www.hatena.ne.jp/images/top/h1.gif" alt="http://www.hatena.ne.jp/images/top/h1.gif" width="150" /></a>
49
+
50
+ === http_image3
51
+ --- text
52
+ [http://www.hatena.ne.jp/images/top/h1.gif:image:h100]
53
+ --- html
54
+ <a href="http://www.hatena.ne.jp/images/top/h1.gif"><img src="http://www.hatena.ne.jp/images/top/h1.gif" alt="http://www.hatena.ne.jp/images/top/h1.gif" height="100" /></a>
55
+
56
+ === http_title
57
+ --- text
58
+ This is our site. [http://www.hatena.ne.jp/:title=Hatena]
59
+ --- html
60
+ This is our site. <a href="http://www.hatena.ne.jp/">Hatena</a>
61
+
62
+ === ftp
63
+ --- text
64
+ Here are our files. ftp://www.hatena.ne.jp/
65
+ --- html
66
+ Here are our files. <a href="ftp://www.hatena.ne.jp/">ftp://www.hatena.ne.jp/</a>
67
+
68
+ === unbracket
69
+ --- text
70
+ I don't want to link to here. []http://dont.link.to.me/[].
71
+ --- html
72
+ I don't want to link to here. http://dont.link.to.me/.
73
+
74
+ === mailto
75
+ --- text
76
+ send me a mail mailto:info@example.com
77
+ --- html
78
+ send me a mail <a href="mailto:info@example.com">mailto:info@example.com</a>
@@ -0,0 +1,43 @@
1
+ use strict;
2
+ use warnings;
3
+ use Test::Base;
4
+ use Text::Hatena::AutoLink;
5
+
6
+ Text::Hatena::AutoLink->syntax({
7
+ 'id:([\w-]+)' => sub {
8
+ my $mvar = shift;
9
+ my $name = $mvar->[1];
10
+ return qq|<a href="/$name/">id:$name</a>|;
11
+ },
12
+ 'd:id:([\w-]+)' => sub {
13
+ my $mvar = shift;
14
+ my $name = $mvar->[1];
15
+ return qq|<a href="http://d.hatena.ne.jp/$name/">d:id:$name</a>|;
16
+ },
17
+ });
18
+
19
+ filters {
20
+ text => ['text_hatena_autolink', 'chomp'],
21
+ html => ['chomp'],
22
+ };
23
+
24
+ sub text_hatena_autolink {
25
+ Text::Hatena::AutoLink->parse(shift);
26
+ }
27
+
28
+ run_is;
29
+
30
+ __END__
31
+ === id
32
+ --- text
33
+ Hello, id:jkondo!
34
+ --- html
35
+ Hello, <a href="/jkondo/">id:jkondo</a>!
36
+
37
+ === d:id
38
+ --- text
39
+ Hello, id:jkondo!
40
+ Is this your blog? d:id:jkondo
41
+ --- html
42
+ Hello, <a href="/jkondo/">id:jkondo</a>!
43
+ Is this your blog? <a href="http://d.hatena.ne.jp/jkondo/">d:id:jkondo</a>
data/test/test_hatena.rb CHANGED
@@ -1,4 +1,4 @@
1
- require 'test/unit'
1
+ require 'test_helper'
2
2
  require 'hparser'
3
3
  require 'hparser/hatena'
4
4
 
@@ -32,7 +32,7 @@ class HatenaTest < Test::Unit::TestCase
32
32
  end
33
33
 
34
34
  def test_quote
35
- assert_hatena ">>\nfoobar\n<<",Quote.new([Text.new('foobar')])
35
+ assert_hatena ">>\nfoobar\n\n<<",Quote.new([P.new([Text.new('foobar')])])
36
36
  end
37
37
 
38
38
  def test_table