gammo 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (53) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +8 -0
  3. data/.travis.yml +6 -0
  4. data/Gemfile +9 -0
  5. data/Gemfile.lock +27 -0
  6. data/LICENSE.txt +21 -0
  7. data/README.md +177 -0
  8. data/Rakefile +25 -0
  9. data/gammo.gemspec +23 -0
  10. data/lib/gammo.rb +15 -0
  11. data/lib/gammo/attribute.rb +17 -0
  12. data/lib/gammo/fragment_parser.rb +65 -0
  13. data/lib/gammo/node.rb +157 -0
  14. data/lib/gammo/parser.rb +524 -0
  15. data/lib/gammo/parser/constants.rb +94 -0
  16. data/lib/gammo/parser/foreign.rb +307 -0
  17. data/lib/gammo/parser/insertion_mode.rb +74 -0
  18. data/lib/gammo/parser/insertion_mode/after_after_body.rb +36 -0
  19. data/lib/gammo/parser/insertion_mode/after_after_frameset.rb +32 -0
  20. data/lib/gammo/parser/insertion_mode/after_body.rb +46 -0
  21. data/lib/gammo/parser/insertion_mode/after_frameset.rb +39 -0
  22. data/lib/gammo/parser/insertion_mode/after_head.rb +70 -0
  23. data/lib/gammo/parser/insertion_mode/before_head.rb +49 -0
  24. data/lib/gammo/parser/insertion_mode/before_html.rb +45 -0
  25. data/lib/gammo/parser/insertion_mode/in_body.rb +463 -0
  26. data/lib/gammo/parser/insertion_mode/in_caption.rb +47 -0
  27. data/lib/gammo/parser/insertion_mode/in_cell.rb +46 -0
  28. data/lib/gammo/parser/insertion_mode/in_column_group.rb +66 -0
  29. data/lib/gammo/parser/insertion_mode/in_frameset.rb +48 -0
  30. data/lib/gammo/parser/insertion_mode/in_head.rb +98 -0
  31. data/lib/gammo/parser/insertion_mode/in_head_noscript.rb +52 -0
  32. data/lib/gammo/parser/insertion_mode/in_row.rb +53 -0
  33. data/lib/gammo/parser/insertion_mode/in_select.rb +77 -0
  34. data/lib/gammo/parser/insertion_mode/in_select_in_table.rb +46 -0
  35. data/lib/gammo/parser/insertion_mode/in_table.rb +114 -0
  36. data/lib/gammo/parser/insertion_mode/in_table_body.rb +55 -0
  37. data/lib/gammo/parser/insertion_mode/in_template.rb +80 -0
  38. data/lib/gammo/parser/insertion_mode/initial.rb +152 -0
  39. data/lib/gammo/parser/insertion_mode/text.rb +32 -0
  40. data/lib/gammo/parser/insertion_mode_stack.rb +8 -0
  41. data/lib/gammo/parser/node_stack.rb +24 -0
  42. data/lib/gammo/tags.rb +9 -0
  43. data/lib/gammo/tags/table.rb +744 -0
  44. data/lib/gammo/tokenizer.rb +373 -0
  45. data/lib/gammo/tokenizer/debug.rb +34 -0
  46. data/lib/gammo/tokenizer/entity.rb +2240 -0
  47. data/lib/gammo/tokenizer/escape.rb +174 -0
  48. data/lib/gammo/tokenizer/script_scanner.rb +229 -0
  49. data/lib/gammo/tokenizer/tokens.rb +66 -0
  50. data/lib/gammo/version.rb +3 -0
  51. data/misc/html.yaml +384 -0
  52. data/misc/table.erubi +14 -0
  53. metadata +97 -0
@@ -0,0 +1,3 @@
1
+ module Gammo
2
+ VERSION = "0.1.0"
3
+ end
@@ -0,0 +1,384 @@
1
+ elements:
2
+ - a
3
+ - abbr
4
+ - address
5
+ - area
6
+ - article
7
+ - aside
8
+ - audio
9
+ - b
10
+ - base
11
+ - bdi
12
+ - bdo
13
+ - blockquote
14
+ - body
15
+ - br
16
+ - button
17
+ - canvas
18
+ - caption
19
+ - cite
20
+ - code
21
+ - col
22
+ - colgroup
23
+ - command
24
+ - data
25
+ - datalist
26
+ - dd
27
+ - del
28
+ - details
29
+ - dfn
30
+ - dialog
31
+ - div
32
+ - dl
33
+ - dt
34
+ - em
35
+ - embed
36
+ - fieldset
37
+ - figcaption
38
+ - figure
39
+ - footer
40
+ - form
41
+ - h1
42
+ - h2
43
+ - h3
44
+ - h4
45
+ - h5
46
+ - h6
47
+ - head
48
+ - header
49
+ - hgroup
50
+ - hr
51
+ - html
52
+ - i
53
+ - iframe
54
+ - img
55
+ - input
56
+ - ins
57
+ - kbd
58
+ - keygen
59
+ - label
60
+ - legend
61
+ - li
62
+ - link
63
+ - main
64
+ - map
65
+ - mark
66
+ - menu
67
+ - menuitem
68
+ - meta
69
+ - meter
70
+ - nav
71
+ - noscript
72
+ - object
73
+ - ol
74
+ - optgroup
75
+ - option
76
+ - output
77
+ - p
78
+ - param
79
+ - picture
80
+ - pre
81
+ - progress
82
+ - q
83
+ - rp
84
+ - rt
85
+ - ruby
86
+ - s
87
+ - samp
88
+ - script
89
+ - section
90
+ - select
91
+ - slot
92
+ - small
93
+ - source
94
+ - span
95
+ - strong
96
+ - style
97
+ - sub
98
+ - summary
99
+ - sup
100
+ - table
101
+ - tbody
102
+ - td
103
+ - template
104
+ - textarea
105
+ - tfoot
106
+ - th
107
+ - thead
108
+ - time
109
+ - title
110
+ - tr
111
+ - track
112
+ - u
113
+ - ul
114
+ - var
115
+ - video
116
+ - wbr
117
+ attributes:
118
+ - abbr
119
+ - accept
120
+ - accept-charset
121
+ - accesskey
122
+ - action
123
+ - allowfullscreen
124
+ - allowpaymentrequest
125
+ - allowusermedia
126
+ - alt
127
+ - as
128
+ - async
129
+ - autocomplete
130
+ - autofocus
131
+ - autoplay
132
+ - challenge
133
+ - charset
134
+ - checked
135
+ - cite
136
+ - class
137
+ - color
138
+ - cols
139
+ - colspan
140
+ - command
141
+ - content
142
+ - contenteditable
143
+ - contextmenu
144
+ - controls
145
+ - coords
146
+ - crossorigin
147
+ - data
148
+ - datetime
149
+ - default
150
+ - defer
151
+ - dir
152
+ - dirname
153
+ - disabled
154
+ - download
155
+ - draggable
156
+ - dropzone
157
+ - enctype
158
+ - for
159
+ - form
160
+ - formaction
161
+ - formenctype
162
+ - formmethod
163
+ - formnovalidate
164
+ - formtarget
165
+ - headers
166
+ - height
167
+ - hidden
168
+ - high
169
+ - href
170
+ - hreflang
171
+ - http-equiv
172
+ - icon
173
+ - id
174
+ - inputmode
175
+ - integrity
176
+ - is
177
+ - ismap
178
+ - itemid
179
+ - itemprop
180
+ - itemref
181
+ - itemscope
182
+ - itemtype
183
+ - keytype
184
+ - kind
185
+ - label
186
+ - lang
187
+ - list
188
+ - loop
189
+ - low
190
+ - manifest
191
+ - max
192
+ - maxlength
193
+ - media
194
+ - mediagroup
195
+ - method
196
+ - min
197
+ - minlength
198
+ - multiple
199
+ - muted
200
+ - name
201
+ - nomodule
202
+ - nonce
203
+ - novalidate
204
+ - open
205
+ - optimum
206
+ - pattern
207
+ - ping
208
+ - placeholder
209
+ - playsinline
210
+ - poster
211
+ - preload
212
+ - radiogroup
213
+ - readonly
214
+ - referrerpolicy
215
+ - rel
216
+ - required
217
+ - reversed
218
+ - rows
219
+ - rowspan
220
+ - sandbox
221
+ - spellcheck
222
+ - scope
223
+ - scoped
224
+ - seamless
225
+ - selected
226
+ - shape
227
+ - size
228
+ - sizes
229
+ - sortable
230
+ - sorted
231
+ - slot
232
+ - span
233
+ - spellcheck
234
+ - src
235
+ - srcdoc
236
+ - srclang
237
+ - srcset
238
+ - start
239
+ - step
240
+ - style
241
+ - tabindex
242
+ - target
243
+ - title
244
+ - translate
245
+ - type
246
+ - typemustmatch
247
+ - updateviacache
248
+ - usemap
249
+ - value
250
+ - width
251
+ - workertype
252
+ - wrap
253
+ event_handlers:
254
+ - onabort
255
+ - onautocomplete
256
+ - onautocompleteerror
257
+ - onauxclick
258
+ - onafterprint
259
+ - onbeforeprint
260
+ - onbeforeunload
261
+ - onblur
262
+ - oncancel
263
+ - oncanplay
264
+ - oncanplaythrough
265
+ - onchange
266
+ - onclick
267
+ - onclose
268
+ - oncontextmenu
269
+ - oncopy
270
+ - oncuechange
271
+ - oncut
272
+ - ondblclick
273
+ - ondrag
274
+ - ondragend
275
+ - ondragenter
276
+ - ondragexit
277
+ - ondragleave
278
+ - ondragover
279
+ - ondragstart
280
+ - ondrop
281
+ - ondurationchange
282
+ - onemptied
283
+ - onended
284
+ - onerror
285
+ - onfocus
286
+ - onhashchange
287
+ - oninput
288
+ - oninvalid
289
+ - onkeydown
290
+ - onkeypress
291
+ - onkeyup
292
+ - onlanguagechange
293
+ - onload
294
+ - onloadeddata
295
+ - onloadedmetadata
296
+ - onloadend
297
+ - onloadstart
298
+ - onmessage
299
+ - onmessageerror
300
+ - onmousedown
301
+ - onmouseenter
302
+ - onmouseleave
303
+ - onmousemove
304
+ - onmouseout
305
+ - onmouseover
306
+ - onmouseup
307
+ - onmousewheel
308
+ - onwheel
309
+ - onoffline
310
+ - ononline
311
+ - onpagehide
312
+ - onpageshow
313
+ - onpaste
314
+ - onpause
315
+ - onplay
316
+ - onplaying
317
+ - onpopstate
318
+ - onprogress
319
+ - onratechange
320
+ - onreset
321
+ - onresize
322
+ - onrejectionhandled
323
+ - onscroll
324
+ - onsecuritypolicyviolation
325
+ - onseeked
326
+ - onseeking
327
+ - onselect
328
+ - onshow
329
+ - onsort
330
+ - onstalled
331
+ - onstorage
332
+ - onsubmit
333
+ - onsuspend
334
+ - ontimeupdate
335
+ - ontoggle
336
+ - onunhandledrejection
337
+ - onunload
338
+ - onvolumechange
339
+ - onwaiting
340
+ extra:
341
+ - acronym
342
+ - align
343
+ - annotation
344
+ - annotation-xml
345
+ - applet
346
+ - basefont
347
+ - bgsound
348
+ - big
349
+ - blink
350
+ - center
351
+ - color
352
+ - desc
353
+ - face
354
+ - font
355
+ - foreignObject
356
+ - foreignobject
357
+ - frame
358
+ - frameset
359
+ - image
360
+ - isindex
361
+ - listing
362
+ - malignmark
363
+ - marquee
364
+ - math
365
+ - mglyph
366
+ - mi
367
+ - mn
368
+ - mo
369
+ - ms
370
+ - mtext
371
+ - nobr
372
+ - noembed
373
+ - noframes
374
+ - plaintext
375
+ - prompt
376
+ - public
377
+ - rb
378
+ - rtc
379
+ - spacer
380
+ - strike
381
+ - svg
382
+ - system
383
+ - tt
384
+ - xmp
@@ -0,0 +1,14 @@
1
+ # Code generated by rake generate; DO NOT EDIT.
2
+ module Gammo
3
+ module Tags
4
+ <%- @tags.each do |tag|-%>
5
+ <%= camelize(tag) %> = '<%= tag %>'.freeze
6
+ <%- end -%>
7
+
8
+ TABLE = {
9
+ <%- @tags.each do |tag|-%>
10
+ '<%= tag %>' => <%= camelize(tag.capitalize) %>,
11
+ <%- end -%>
12
+ }
13
+ end
14
+ end
metadata ADDED
@@ -0,0 +1,97 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: gammo
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - namusyaka
8
+ autorequire:
9
+ bindir: exe
10
+ cert_chain: []
11
+ date: 2020-02-11 00:00:00.000000000 Z
12
+ dependencies: []
13
+ description: Gammo is an implementation of the HTML5 parsing algorithm which conforms
14
+ the WHATWG specification with pure Ruby.
15
+ email:
16
+ - namusyaka@gmail.com
17
+ executables: []
18
+ extensions: []
19
+ extra_rdoc_files: []
20
+ files:
21
+ - ".gitignore"
22
+ - ".travis.yml"
23
+ - Gemfile
24
+ - Gemfile.lock
25
+ - LICENSE.txt
26
+ - README.md
27
+ - Rakefile
28
+ - gammo.gemspec
29
+ - lib/gammo.rb
30
+ - lib/gammo/attribute.rb
31
+ - lib/gammo/fragment_parser.rb
32
+ - lib/gammo/node.rb
33
+ - lib/gammo/parser.rb
34
+ - lib/gammo/parser/constants.rb
35
+ - lib/gammo/parser/foreign.rb
36
+ - lib/gammo/parser/insertion_mode.rb
37
+ - lib/gammo/parser/insertion_mode/after_after_body.rb
38
+ - lib/gammo/parser/insertion_mode/after_after_frameset.rb
39
+ - lib/gammo/parser/insertion_mode/after_body.rb
40
+ - lib/gammo/parser/insertion_mode/after_frameset.rb
41
+ - lib/gammo/parser/insertion_mode/after_head.rb
42
+ - lib/gammo/parser/insertion_mode/before_head.rb
43
+ - lib/gammo/parser/insertion_mode/before_html.rb
44
+ - lib/gammo/parser/insertion_mode/in_body.rb
45
+ - lib/gammo/parser/insertion_mode/in_caption.rb
46
+ - lib/gammo/parser/insertion_mode/in_cell.rb
47
+ - lib/gammo/parser/insertion_mode/in_column_group.rb
48
+ - lib/gammo/parser/insertion_mode/in_frameset.rb
49
+ - lib/gammo/parser/insertion_mode/in_head.rb
50
+ - lib/gammo/parser/insertion_mode/in_head_noscript.rb
51
+ - lib/gammo/parser/insertion_mode/in_row.rb
52
+ - lib/gammo/parser/insertion_mode/in_select.rb
53
+ - lib/gammo/parser/insertion_mode/in_select_in_table.rb
54
+ - lib/gammo/parser/insertion_mode/in_table.rb
55
+ - lib/gammo/parser/insertion_mode/in_table_body.rb
56
+ - lib/gammo/parser/insertion_mode/in_template.rb
57
+ - lib/gammo/parser/insertion_mode/initial.rb
58
+ - lib/gammo/parser/insertion_mode/text.rb
59
+ - lib/gammo/parser/insertion_mode_stack.rb
60
+ - lib/gammo/parser/node_stack.rb
61
+ - lib/gammo/tags.rb
62
+ - lib/gammo/tags/table.rb
63
+ - lib/gammo/tokenizer.rb
64
+ - lib/gammo/tokenizer/debug.rb
65
+ - lib/gammo/tokenizer/entity.rb
66
+ - lib/gammo/tokenizer/escape.rb
67
+ - lib/gammo/tokenizer/script_scanner.rb
68
+ - lib/gammo/tokenizer/tokens.rb
69
+ - lib/gammo/version.rb
70
+ - misc/html.yaml
71
+ - misc/table.erubi
72
+ homepage: https://github.com/namusyaka/gammo
73
+ licenses:
74
+ - MIT
75
+ metadata:
76
+ homepage_uri: https://github.com/namusyaka/gammo
77
+ source_code_uri: https://github.com/namusyaka/gammo
78
+ post_install_message:
79
+ rdoc_options: []
80
+ require_paths:
81
+ - lib
82
+ required_ruby_version: !ruby/object:Gem::Requirement
83
+ requirements:
84
+ - - ">="
85
+ - !ruby/object:Gem::Version
86
+ version: 2.3.0
87
+ required_rubygems_version: !ruby/object:Gem::Requirement
88
+ requirements:
89
+ - - ">="
90
+ - !ruby/object:Gem::Version
91
+ version: '0'
92
+ requirements: []
93
+ rubygems_version: 3.1.2
94
+ signing_key:
95
+ specification_version: 4
96
+ summary: An HTML parser which implements WHATWG parsing algorithm.
97
+ test_files: []