hparser 0.3.0 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (63) hide show
  1. data/.gitignore +2 -0
  2. data/.travis.yml +11 -0
  3. data/ChangeLog +4 -0
  4. data/Gemfile +3 -0
  5. data/README.md +40 -0
  6. data/Rakefile +12 -0
  7. data/VERSION +1 -0
  8. data/hparser.gemspec +21 -0
  9. data/lib/hparser/block/dl.rb +4 -4
  10. data/lib/hparser/block/footnote_list.rb +19 -0
  11. data/lib/hparser/block/head.rb +2 -2
  12. data/lib/hparser/block/list.rb +8 -8
  13. data/lib/hparser/block/p.rb +4 -3
  14. data/lib/hparser/block/pair.rb +12 -7
  15. data/lib/hparser/block/quote.rb +32 -2
  16. data/lib/hparser/block/raw.rb +34 -0
  17. data/lib/hparser/block/see_more.rb +31 -0
  18. data/lib/hparser/block/super_pre.rb +21 -3
  19. data/lib/hparser/block/table.rb +4 -4
  20. data/lib/hparser/hatena.rb +3 -1
  21. data/lib/hparser/html.rb +181 -13
  22. data/lib/hparser/inline/comment.rb +27 -0
  23. data/lib/hparser/inline/footnote.rb +34 -0
  24. data/lib/hparser/inline/fotolife.rb +40 -0
  25. data/lib/hparser/inline/hatena_id.rb +7 -6
  26. data/lib/hparser/inline/parser.rb +3 -2
  27. data/lib/hparser/inline/tex.rb +27 -0
  28. data/lib/hparser/inline/text.rb +3 -2
  29. data/lib/hparser/inline/url.rb +20 -6
  30. data/lib/hparser/latex.rb +273 -0
  31. data/lib/hparser/parser.rb +17 -1
  32. data/lib/hparser/text.rb +42 -0
  33. data/lib/hparser/util/line_scanner.rb +3 -2
  34. data/lib/hparser.rb +1 -0
  35. data/test/integration_texts/error1.ok.hatena +23 -0
  36. data/test/test_block.rb +65 -2
  37. data/test/test_bruteforce.rb +48 -0
  38. data/test/test_dl.rb +13 -1
  39. data/test/test_footnote.rb +42 -0
  40. data/test/test_fotolife.rb +29 -0
  41. data/test/test_from_perl/01_module.t +559 -0
  42. data/test/test_from_perl/02_module_extend.t +36 -0
  43. data/test/test_from_perl/10_autolink.t +78 -0
  44. data/test/test_from_perl/11_autolink_extend.t +43 -0
  45. data/test/test_hatena.rb +2 -2
  46. data/test/test_head.rb +7 -1
  47. data/test/test_helper.rb +11 -0
  48. data/test/test_html.rb +39 -3
  49. data/test/test_id.rb +1 -1
  50. data/test/test_inline.rb +13 -1
  51. data/test/test_inline_html.rb +37 -2
  52. data/test/test_integration.rb +20 -0
  53. data/test/test_latex.rb +101 -0
  54. data/test/test_p.rb +23 -3
  55. data/test/test_pair.rb +22 -4
  56. data/test/test_quote.rb +69 -0
  57. data/test/test_see_more.rb +28 -0
  58. data/test/test_table.rb +1 -1
  59. data/test/test_tex.rb +24 -0
  60. data/test/test_text.rb +12 -2
  61. data/test/test_url.rb +39 -2
  62. metadata +141 -58
  63. data/README +0 -17
@@ -0,0 +1,559 @@
1
+ use File::Spec;
2
+ use strict;
3
+ use warnings;
4
+ use Test::Base;
5
+ use Text::Hatena;
6
+
7
+ filters {
8
+ text => ['text_hatena', 'omit_indent', 'chomp'],
9
+ line => ['text_hatena_p', 'omit_indent', 'chomp'],
10
+ html => ['omit_indent', 'chomp'],
11
+ };
12
+
13
+ sub text_hatena { Text::Hatena->parse(shift, 'body') }
14
+ sub text_hatena_p { Text::Hatena->parse(shift, 'p') }
15
+ sub omit_indent {
16
+ (my $text = shift) =~ s/^[\t\s]+//gmo;
17
+ return $text;
18
+ }
19
+
20
+ #use Carp;
21
+ #local $SIG{'__WARN__'} = \&Carp::confess;
22
+
23
+ run_is;
24
+
25
+ __END__
26
+ === h3
27
+ --- text
28
+ *Hello, World!
29
+ --- html
30
+ <div class="section">
31
+ <h3>Hello, World!</h3>
32
+ </div>
33
+
34
+ === h3_2
35
+ --- text
36
+ *Hello, World!
37
+ This is Text::Hatena.
38
+ --- html
39
+ <div class="section">
40
+ <h3>Hello, World!</h3>
41
+ <p>This is Text::Hatena.</p>
42
+ </div>
43
+
44
+ === h3_3
45
+ --- text
46
+ *Hello, World!
47
+ This is Text::Hatena.
48
+ --- html
49
+ <div class="section">
50
+ <p> *Hello, World!</p>
51
+ <p>This is Text::Hatena.</p>
52
+ </div>
53
+
54
+ === h3_4
55
+ --- text
56
+ *Good morning
57
+
58
+ It's morning.
59
+
60
+ *Good afternoon
61
+
62
+ Beautiful day!
63
+ --- html
64
+ <div class="section">
65
+ <h3>Good morning</h3>
66
+
67
+ <p>It's morning.</p>
68
+ </div>
69
+ <div class="section">
70
+ <h3>Good afternoon</h3>
71
+
72
+ <p>Beautiful day!</p>
73
+ </div>
74
+
75
+ === h4
76
+ --- text
77
+ **Hello, Japan!
78
+
79
+ This is Text::Hatena.
80
+ --- html
81
+ <div class="section">
82
+ <h4>Hello, Japan!</h4>
83
+
84
+ <p>This is Text::Hatena.</p>
85
+ </div>
86
+
87
+ === h5
88
+ --- text
89
+ ***Hello, Tokyo!
90
+
91
+ This is Text::Hatena.
92
+ --- html
93
+ <div class="section">
94
+ <h5>Hello, Tokyo!</h5>
95
+
96
+ <p>This is Text::Hatena.</p>
97
+ </div>
98
+
99
+ === blockquote
100
+ --- text
101
+ >>
102
+ quoted
103
+ <<
104
+ --- html
105
+ <div class="section">
106
+ <blockquote>
107
+ <p>quoted</p>
108
+ </blockquote>
109
+ </div>
110
+
111
+ === blockquote2
112
+ --- text
113
+ >>
114
+ quoted
115
+ >>
116
+ quoted quoted
117
+ <<
118
+ <<
119
+ --- html
120
+ <div class="section">
121
+ <blockquote>
122
+ <p>quoted</p>
123
+ <blockquote>
124
+ <p>quoted quoted</p>
125
+ </blockquote>
126
+ </blockquote>
127
+ </div>
128
+
129
+ === blockquote3
130
+ --- text
131
+ >>
132
+ unquoted
133
+ <<
134
+ --- html
135
+ <div class="section">
136
+ <p> >></p>
137
+ <p> unquoted</p>
138
+ <p> <<</p>
139
+ </div>
140
+
141
+ === blockquote4
142
+ --- text
143
+ >http://www.hatena.ne.jp/>
144
+ Hatena
145
+ <<
146
+ --- html
147
+ <div class="section">
148
+ <blockquote title="http://www.hatena.ne.jp/" cite="http://www.hatena.ne.jp/">
149
+ <p>Hatena</p>
150
+ <cite><a href="http://www.hatena.ne.jp/">http://www.hatena.ne.jp/</a></cite>
151
+ </blockquote>
152
+ </div>
153
+
154
+ === blockquote5
155
+ --- text
156
+ >http://www.hatena.ne.jp/:title=Hatena>
157
+ Hatena
158
+ <<
159
+ --- html
160
+ <div class="section">
161
+ <blockquote title="Hatena" cite="http://www.hatena.ne.jp/">
162
+ <p>Hatena</p>
163
+ <cite><a href="http://www.hatena.ne.jp/">Hatena</a></cite>
164
+ </blockquote>
165
+ </div>
166
+
167
+ === dl
168
+ --- text
169
+ :cinnamon:dog
170
+ --- html
171
+ <div class="section">
172
+ <dl>
173
+ <dt>cinnamon</dt>
174
+ <dd>dog</dd>
175
+ </dl>
176
+ </div>
177
+
178
+ === dl2
179
+ --- text
180
+ :cinnamon:dog
181
+ :tama:cat
182
+ --- html
183
+ <div class="section">
184
+ <dl>
185
+ <dt>cinnamon</dt>
186
+ <dd>dog</dd>
187
+ <dt>tama</dt>
188
+ <dd>cat</dd>
189
+ </dl>
190
+ </div>
191
+
192
+ === ul
193
+ --- text
194
+ -komono
195
+ -kyoto
196
+ -shibuya
197
+ --- html
198
+ <div class="section">
199
+ <ul>
200
+ <li>komono</li>
201
+ <li>kyoto</li>
202
+ <li>shibuya</li>
203
+ </ul>
204
+ </div>
205
+
206
+ === ul2
207
+ --- text
208
+ -komono
209
+ --kyoto
210
+ ---shibuya
211
+ --hachiyama
212
+ --- html
213
+ <div class="section">
214
+ <ul>
215
+ <li>komono
216
+ <ul>
217
+ <li>kyoto
218
+ <ul>
219
+ <li>shibuya</li>
220
+ </ul>
221
+ </li>
222
+ <li>hachiyama</li>
223
+ </ul>
224
+ </li>
225
+ </ul>
226
+ </div>
227
+
228
+ === ul3
229
+ --- text
230
+ -list
231
+ --ul
232
+ --ol
233
+ -pre
234
+ --- html
235
+ <div class="section">
236
+ <ul>
237
+ <li>list
238
+ <ul>
239
+ <li>ul</li>
240
+ <li>ol</li>
241
+ </ul>
242
+ </li>
243
+ <li>pre</li>
244
+ </ul>
245
+ </div>
246
+
247
+ === ul4
248
+ --- text
249
+ - wrong list
250
+ - what's happen?
251
+ --- html
252
+ <div class="section">
253
+ <p> - wrong list</p>
254
+ <p> - what's happen?</p>
255
+ </div>
256
+
257
+ === ul5
258
+ --- text
259
+ - right list
260
+ - wrong list
261
+ - what's happen?
262
+ --- html
263
+ <div class="section">
264
+ <ul>
265
+ <li> right list</li>
266
+ </ul>
267
+ <p> - wrong list</p>
268
+ <p> - what's happen?</p>
269
+ </div>
270
+
271
+ === ul6
272
+ --- text
273
+ -Japan
274
+ --Kyoto
275
+ --Tokyo
276
+ -USA
277
+ --Mountain View
278
+ --- html
279
+ <div class="section">
280
+ <ul>
281
+ <li>Japan
282
+ <ul>
283
+ <li>Kyoto</li>
284
+ <li>Tokyo</li>
285
+ </ul>
286
+ </li>
287
+ <li>USA
288
+ <ul>
289
+ <li>Mountain View</li>
290
+ </ul>
291
+ </li>
292
+ </ul>
293
+ </div>
294
+
295
+ === ul7
296
+ --- text
297
+ -komono
298
+ --kyoto
299
+ ---shibuya
300
+ --hachiyama
301
+ --- html
302
+ <div class="section">
303
+ <ul>
304
+ <li>komono
305
+ <ul>
306
+ <li>kyoto
307
+ <ul>
308
+ <li>shibuya</li>
309
+ </ul>
310
+ </li>
311
+ <li>hachiyama</li>
312
+ </ul>
313
+ </li>
314
+ </ul>
315
+ </div>
316
+
317
+ === ol
318
+ --- text
319
+ +Register
320
+ +Login
321
+ +Write your blog
322
+ --- html
323
+ <div class="section">
324
+ <ol>
325
+ <li>Register</li>
326
+ <li>Login</li>
327
+ <li>Write your blog</li>
328
+ </ol>
329
+ </div>
330
+
331
+ === ol2
332
+ --- text
333
+ -Steps
334
+ ++Register
335
+ ++Login
336
+ ++Write your blog
337
+ -Option
338
+ --180pt
339
+ --- html
340
+ <div class="section">
341
+ <ul>
342
+ <li>Steps
343
+ <ol>
344
+ <li>Register</li>
345
+ <li>Login</li>
346
+ <li>Write your blog</li>
347
+ </ol>
348
+ </li>
349
+ <li>Option
350
+ <ul>
351
+ <li>180pt</li>
352
+ </ul>
353
+ </li>
354
+ </ul>
355
+ </div>
356
+
357
+ === super_pre
358
+ --- text
359
+ >||
360
+ #!/usr/bin/perl
361
+
362
+ my $url = 'http://d.hatena.ne.jp/';
363
+ ||<
364
+ --- html
365
+ <div class="section">
366
+ <pre>
367
+ #!/usr/bin/perl
368
+
369
+ my $url = 'http://d.hatena.ne.jp/';
370
+ </pre>
371
+ </div>
372
+
373
+ === super_pre_fail
374
+ --- text
375
+ >||
376
+ #!/usr/bin/perl
377
+
378
+ my $name = 'jkondo'||<
379
+ --- html
380
+ <div class="section">
381
+ <p>>||</p>
382
+ <p>#!/usr/bin/perl</p>
383
+
384
+ <p>my $name = 'jkondo'||<</p>
385
+ </div>
386
+
387
+ === super_pre2
388
+ --- text
389
+ >|perl|
390
+ #!/usr/bin/perl
391
+
392
+ my $url = 'http://d.hatena.ne.jp/';
393
+ ||<
394
+ --- html
395
+ <div class="section">
396
+ <pre>
397
+ #!/usr/bin/perl
398
+
399
+ my $url = 'http://d.hatena.ne.jp/';
400
+ </pre>
401
+ </div>
402
+
403
+ === super_pre3
404
+ --- text
405
+ >||
406
+ >>
407
+ unquoted
408
+ <<
409
+ - unlisted
410
+ http://www.hatena.com/ unanchored.
411
+ ||<
412
+ --- html
413
+ <div class="section">
414
+ <pre>
415
+ >>
416
+ unquoted
417
+ <<
418
+ - unlisted
419
+ http://www.hatena.com/ unanchored.
420
+ </pre>
421
+ </div>
422
+
423
+ === super_pre4
424
+ --- text
425
+ >||
426
+ >>
427
+ unquoted
428
+ <<
429
+ - unlisted
430
+ http://www.hatena.com/ unanchored.
431
+ <a href="http://www.hatena.com/">escaped tags</a>
432
+ ||<
433
+ --- html
434
+ <div class="section">
435
+ <pre>
436
+ >>
437
+ unquoted
438
+ <<
439
+ - unlisted
440
+ http://www.hatena.com/ unanchored.
441
+ <a href="http://www.hatena.com/">escaped tags</a>
442
+ </pre>
443
+ </div>
444
+
445
+ === pre
446
+ --- text
447
+ >|
448
+ #!/usr/bin/perl
449
+ use strict;
450
+ use warnings;
451
+
452
+ say 'Hello, World!';
453
+ |<
454
+ --- html
455
+ <div class="section">
456
+ <pre>
457
+ #!/usr/bin/perl
458
+ use strict;
459
+ use warnings;
460
+
461
+ say 'Hello, World!';
462
+ </pre>
463
+ </div>
464
+
465
+ === pre2
466
+ --- text
467
+ >|
468
+ To: info@test.com
469
+ Subject: This is Test.
470
+
471
+ Hello, This is test from Text::Hatena.
472
+ Don't reply to this email.
473
+
474
+ --
475
+ jkondo
476
+ |<
477
+ --- html
478
+ <div class="section">
479
+ <pre>
480
+ To: info@test.com
481
+ Subject: This is Test.
482
+
483
+ Hello, This is test from Text::Hatena.
484
+ Don't reply to this email.
485
+
486
+ --
487
+ jkondo
488
+ </pre>
489
+ </div>
490
+
491
+ === table
492
+ --- text
493
+ |*Lang|*Module|
494
+ |Perl|Text::Hatena|
495
+ --- html
496
+ <div class="section">
497
+ <table>
498
+ <tr>
499
+ <th>Lang</th>
500
+ <th>Module</th>
501
+ </tr>
502
+ <tr>
503
+ <td>Perl</td>
504
+ <td>Text::Hatena</td>
505
+ </tr>
506
+ </table>
507
+ </div>
508
+
509
+ === cdata
510
+ --- text
511
+ ><div>no paragraph line</div><
512
+ paragraph line
513
+ --- html
514
+ <div class="section">
515
+ <div>no paragraph line</div>
516
+ <p>paragraph line</p>
517
+ </div>
518
+
519
+ === cdata2
520
+ --- text
521
+ ><blockquote>
522
+ <p>Hello I am writing HTML tags by myself</p>
523
+ </blockquote><
524
+ --- html
525
+ <div class="section">
526
+ <blockquote>
527
+ <p>Hello I am writing HTML tags by myself</p>
528
+ </blockquote>
529
+ </div>
530
+
531
+ === cdata3
532
+ --- text
533
+ ><blockquote><
534
+ Please add p tags for me.
535
+ It's candy blockquote.
536
+ ></blockquote><
537
+ --- html
538
+ <div class="section">
539
+ <blockquote>
540
+ <p>Please add p tags for me.</p>
541
+ <p>It's candy blockquote.</p>
542
+ </blockquote>
543
+ </div>
544
+
545
+ === autolink
546
+ --- text
547
+ *Hello World!
548
+
549
+ Here is Text::Hatena.
550
+ CPAN site: http://search.cpan.org/dist/Text-Hatena/
551
+ Have fun!
552
+ --- html
553
+ <div class="section">
554
+ <h3>Hello World!</h3>
555
+
556
+ <p>Here is Text::Hatena.</p>
557
+ <p>CPAN site: <a href="http://search.cpan.org/dist/Text-Hatena/">http://search.cpan.org/dist/Text-Hatena/</a></p>
558
+ <p>Have fun!</p>
559
+ </div>
@@ -0,0 +1,36 @@
1
+ use File::Spec;
2
+ use lib File::Spec->catdir('t', 'lib');
3
+ use strict;
4
+ use warnings;
5
+ use Test::Base;
6
+ use MyParser;
7
+
8
+ filters {
9
+ text => ['my_parse', 'omit_indent', 'chomp'],
10
+ html => ['omit_indent', 'chomp'],
11
+ };
12
+
13
+ sub my_parse { MyParser->parse(shift, 'body') }
14
+ sub omit_indent {
15
+ (my $text = shift) =~ s/^[\t\s]+//gmo;
16
+ return $text;
17
+ }
18
+
19
+ run_is;
20
+
21
+ __END__
22
+ === h3
23
+ --- text
24
+ *Hello, World!
25
+ --- html
26
+ <div class="section">
27
+ <h3>Hello, World!</h3>
28
+ </div>
29
+
30
+ === h3_2
31
+ --- text
32
+ *1172604381*Hello, World!
33
+ --- html
34
+ <div class="section">
35
+ <h3>Hello, World!<span class="timestamp">1172604381</span></h3>
36
+ </div>
@@ -0,0 +1,78 @@
1
+ use strict;
2
+ use warnings;
3
+ use Test::Base;
4
+ use Text::Hatena::AutoLink;
5
+
6
+ filters {
7
+ text => ['text_hatena_autolink', 'chomp'],
8
+ html => ['chomp'],
9
+ };
10
+
11
+ sub text_hatena_autolink {
12
+ Text::Hatena::AutoLink->parse(shift);
13
+ }
14
+
15
+ run_is;
16
+
17
+ __END__
18
+ === http
19
+ --- text
20
+ http://www.hatena.com/
21
+ --- html
22
+ <a href="http://www.hatena.com/">http://www.hatena.com/</a>
23
+
24
+ === http2
25
+ --- text
26
+ hatena: http://www.hatena.com/
27
+ --- html
28
+ hatena: <a href="http://www.hatena.com/">http://www.hatena.com/</a>
29
+
30
+ === http3
31
+ --- text
32
+ hatena: http://www.hatena.com/
33
+ hatena(jp): http://www.hatena.ne.jp/
34
+ --- html
35
+ hatena: <a href="http://www.hatena.com/">http://www.hatena.com/</a>
36
+ hatena(jp): <a href="http://www.hatena.ne.jp/">http://www.hatena.ne.jp/</a>
37
+
38
+ === http_image
39
+ --- text
40
+ [http://www.hatena.ne.jp/images/top/h1.gif:image]
41
+ --- html
42
+ <a href="http://www.hatena.ne.jp/images/top/h1.gif"><img src="http://www.hatena.ne.jp/images/top/h1.gif" alt="http://www.hatena.ne.jp/images/top/h1.gif" /></a>
43
+
44
+ === http_image2
45
+ --- text
46
+ [http://www.hatena.ne.jp/images/top/h1.gif:image:w150]
47
+ --- html
48
+ <a href="http://www.hatena.ne.jp/images/top/h1.gif"><img src="http://www.hatena.ne.jp/images/top/h1.gif" alt="http://www.hatena.ne.jp/images/top/h1.gif" width="150" /></a>
49
+
50
+ === http_image3
51
+ --- text
52
+ [http://www.hatena.ne.jp/images/top/h1.gif:image:h100]
53
+ --- html
54
+ <a href="http://www.hatena.ne.jp/images/top/h1.gif"><img src="http://www.hatena.ne.jp/images/top/h1.gif" alt="http://www.hatena.ne.jp/images/top/h1.gif" height="100" /></a>
55
+
56
+ === http_title
57
+ --- text
58
+ This is our site. [http://www.hatena.ne.jp/:title=Hatena]
59
+ --- html
60
+ This is our site. <a href="http://www.hatena.ne.jp/">Hatena</a>
61
+
62
+ === ftp
63
+ --- text
64
+ Here are our files. ftp://www.hatena.ne.jp/
65
+ --- html
66
+ Here are our files. <a href="ftp://www.hatena.ne.jp/">ftp://www.hatena.ne.jp/</a>
67
+
68
+ === unbracket
69
+ --- text
70
+ I don't want to link to here. []http://dont.link.to.me/[].
71
+ --- html
72
+ I don't want to link to here. http://dont.link.to.me/.
73
+
74
+ === mailto
75
+ --- text
76
+ send me a mail mailto:info@example.com
77
+ --- html
78
+ send me a mail <a href="mailto:info@example.com">mailto:info@example.com</a>
@@ -0,0 +1,43 @@
1
+ use strict;
2
+ use warnings;
3
+ use Test::Base;
4
+ use Text::Hatena::AutoLink;
5
+
6
+ Text::Hatena::AutoLink->syntax({
7
+ 'id:([\w-]+)' => sub {
8
+ my $mvar = shift;
9
+ my $name = $mvar->[1];
10
+ return qq|<a href="/$name/">id:$name</a>|;
11
+ },
12
+ 'd:id:([\w-]+)' => sub {
13
+ my $mvar = shift;
14
+ my $name = $mvar->[1];
15
+ return qq|<a href="http://d.hatena.ne.jp/$name/">d:id:$name</a>|;
16
+ },
17
+ });
18
+
19
+ filters {
20
+ text => ['text_hatena_autolink', 'chomp'],
21
+ html => ['chomp'],
22
+ };
23
+
24
+ sub text_hatena_autolink {
25
+ Text::Hatena::AutoLink->parse(shift);
26
+ }
27
+
28
+ run_is;
29
+
30
+ __END__
31
+ === id
32
+ --- text
33
+ Hello, id:jkondo!
34
+ --- html
35
+ Hello, <a href="/jkondo/">id:jkondo</a>!
36
+
37
+ === d:id
38
+ --- text
39
+ Hello, id:jkondo!
40
+ Is this your blog? d:id:jkondo
41
+ --- html
42
+ Hello, <a href="/jkondo/">id:jkondo</a>!
43
+ Is this your blog? <a href="http://d.hatena.ne.jp/jkondo/">d:id:jkondo</a>
data/test/test_hatena.rb CHANGED
@@ -1,4 +1,4 @@
1
- require 'test/unit'
1
+ require 'test_helper'
2
2
  require 'hparser'
3
3
  require 'hparser/hatena'
4
4
 
@@ -32,7 +32,7 @@ class HatenaTest < Test::Unit::TestCase
32
32
  end
33
33
 
34
34
  def test_quote
35
- assert_hatena ">>\nfoobar\n<<",Quote.new([Text.new('foobar')])
35
+ assert_hatena ">>\nfoobar\n\n<<",Quote.new([P.new([Text.new('foobar')])])
36
36
  end
37
37
 
38
38
  def test_table