webr 0.0.5

Sign up to get free protection for your applications and to get access to all the features.
Files changed (53) hide show
  1. data/README.md +4 -0
  2. data/Rakefile +19 -0
  3. data/app/webr.rb +57 -0
  4. data/bin/webr +6 -0
  5. data/ext/jasmine/lib/jasmine.js +2423 -0
  6. data/ext/jsdom/lib/jsdom.js +70 -0
  7. data/ext/jsdom/lib/jsdom/browser/domtohtml.js +198 -0
  8. data/ext/jsdom/lib/jsdom/browser/htmlencoding.js +381 -0
  9. data/ext/jsdom/lib/jsdom/browser/htmltodom.js +151 -0
  10. data/ext/jsdom/lib/jsdom/browser/index.js +484 -0
  11. data/ext/jsdom/lib/jsdom/level1/core.js +1610 -0
  12. data/ext/jsdom/lib/jsdom/level2/core.js +406 -0
  13. data/ext/jsdom/lib/jsdom/level2/events.js +358 -0
  14. data/ext/jsdom/lib/jsdom/level2/html.js +1424 -0
  15. data/ext/jsdom/lib/jsdom/level2/index.js +7 -0
  16. data/ext/jsdom/lib/jsdom/level2/languages/javascript.js +17 -0
  17. data/ext/jsdom/lib/jsdom/level3/core.js +514 -0
  18. data/ext/jsdom/lib/jsdom/level3/events.js +296 -0
  19. data/ext/jsdom/lib/jsdom/level3/html.js +5 -0
  20. data/ext/jsdom/lib/jsdom/level3/index.js +7 -0
  21. data/ext/node-htmlparser/lib/node-htmlparser.js +769 -0
  22. data/ext/node-htmlparser/lib/node-htmlparser.min.js +22 -0
  23. data/ext/request/request.js +116 -0
  24. data/js/jasmine-start.js +10 -0
  25. data/js/webr.js +97 -0
  26. data/jspec/jasmine_spec.js +23 -0
  27. data/lib/webr.rb +17 -0
  28. data/lib/webr/browser.rb +44 -0
  29. data/lib/webr/jasmine.rb +6 -0
  30. data/lib/webr/jasmine/browser.rb +15 -0
  31. data/lib/webr/jasmine/reporter.rb +16 -0
  32. data/lib/webr/jasmine/reporter/base.rb +40 -0
  33. data/lib/webr/jasmine/reporter/console.rb +79 -0
  34. data/lib/webr/jasmine/reporter/html.rb +179 -0
  35. data/lib/webr/portal.rb +19 -0
  36. data/lib/webr/runtime.rb +23 -0
  37. data/lib/webr/version.rb +3 -0
  38. data/spec/data/plain.html +13 -0
  39. data/spec/data/script-embedded.html +17 -0
  40. data/spec/data/script-external-onload.html +11 -0
  41. data/spec/data/script-external-onload.js +11 -0
  42. data/spec/data/script-external.html +11 -0
  43. data/spec/data/script-external.js +1 -0
  44. data/spec/data/script-jquery-1.4.2.html +12 -0
  45. data/spec/data/script-jquery-1.4.3.html +12 -0
  46. data/spec/data/script-jquery.js +3 -0
  47. data/spec/lib/webr/browser_spec.rb +133 -0
  48. data/spec/lib/webr/jasmine/browser_spec.rb +22 -0
  49. data/spec/lib/webr/jasmine/reporter/html_spec.rb +15 -0
  50. data/spec/spec_helper.rb +4 -0
  51. data/tasks/spec.rake +16 -0
  52. data/webr.gemspec +30 -0
  53. metadata +207 -0
@@ -0,0 +1,296 @@
1
+ var events = require("../level2/events").dom.level2.events;
2
+
3
+ /*
4
+
5
+ // File: events.idl
6
+
7
+ #ifndef _EVENTS_IDL_
8
+ #define _EVENTS_IDL_
9
+
10
+ #include "dom.idl"
11
+ #include "views.idl"
12
+
13
+ #pragma prefix "dom.w3c.org"
14
+ module events
15
+ {
16
+
17
+ typedef dom::DOMString DOMString;
18
+ typedef dom::DOMTimeStamp DOMTimeStamp;
19
+ typedef dom::DOMObject DOMObject;
20
+ typedef dom::Node Node;
21
+
22
+ interface EventTarget;
23
+ interface EventListener;
24
+
25
+ // Introduced in DOM Level 2:
26
+ exception EventException {
27
+ unsigned short code;
28
+ };
29
+ // EventExceptionCode
30
+ const unsigned short UNSPECIFIED_EVENT_TYPE_ERR = 0;
31
+ // Introduced in DOM Level 3:
32
+ const unsigned short DISPATCH_REQUEST_ERR = 1;
33
+
34
+
35
+ // Introduced in DOM Level 2:
36
+ interface Event {
37
+
38
+ // PhaseType
39
+ const unsigned short CAPTURING_PHASE = 1;
40
+ const unsigned short AT_TARGET = 2;
41
+ const unsigned short BUBBLING_PHASE = 3;
42
+
43
+ readonly attribute DOMString type;
44
+ readonly attribute EventTarget target;
45
+ readonly attribute EventTarget currentTarget;
46
+ readonly attribute unsigned short eventPhase;
47
+ readonly attribute boolean bubbles;
48
+ readonly attribute boolean cancelable;
49
+ readonly attribute DOMTimeStamp timeStamp;
50
+ void stopPropagation();
51
+ void preventDefault();
52
+ void initEvent(in DOMString eventTypeArg,
53
+ in boolean canBubbleArg,
54
+ in boolean cancelableArg);
55
+ // Introduced in DOM Level 3:
56
+ readonly attribute DOMString namespaceURI;
57
+ // Introduced in DOM Level 3:
58
+ boolean isCustom();
59
+ // Introduced in DOM Level 3:
60
+ void stopImmediatePropagation();
61
+ // Introduced in DOM Level 3:
62
+ boolean isDefaultPrevented();
63
+ // Introduced in DOM Level 3:
64
+ void initEventNS(in DOMString namespaceURIArg,
65
+ in DOMString eventTypeArg,
66
+ in boolean canBubbleArg,
67
+ in boolean cancelableArg);
68
+ };
69
+
70
+ // Introduced in DOM Level 2:
71
+ interface EventTarget {
72
+ void addEventListener(in DOMString type,
73
+ in EventListener listener,
74
+ in boolean useCapture);
75
+ void removeEventListener(in DOMString type,
76
+ in EventListener listener,
77
+ in boolean useCapture);
78
+ // Modified in DOM Level 3:
79
+ boolean dispatchEvent(in Event evt)
80
+ raises(EventException);
81
+ // Introduced in DOM Level 3:
82
+ void addEventListenerNS(in DOMString namespaceURI,
83
+ in DOMString type,
84
+ in EventListener listener,
85
+ in boolean useCapture,
86
+ in DOMObject evtGroup);
87
+ // Introduced in DOM Level 3:
88
+ void removeEventListenerNS(in DOMString namespaceURI,
89
+ in DOMString type,
90
+ in EventListener listener,
91
+ in boolean useCapture);
92
+ // Introduced in DOM Level 3:
93
+ boolean willTriggerNS(in DOMString namespaceURI,
94
+ in DOMString type);
95
+ // Introduced in DOM Level 3:
96
+ boolean hasEventListenerNS(in DOMString namespaceURI,
97
+ in DOMString type);
98
+ };
99
+
100
+ // Introduced in DOM Level 2:
101
+ interface EventListener {
102
+ void handleEvent(in Event evt);
103
+ };
104
+
105
+ // Introduced in DOM Level 2:
106
+ interface DocumentEvent {
107
+ Event createEvent(in DOMString eventType)
108
+ raises(dom::DOMException);
109
+ // Introduced in DOM Level 3:
110
+ boolean canDispatch(in DOMString namespaceURI,
111
+ in DOMString type);
112
+ };
113
+
114
+ // Introduced in DOM Level 3:
115
+ interface CustomEvent : Event {
116
+ void setDispatchState(in EventTarget target,
117
+ in unsigned short phase);
118
+ boolean isPropagationStopped();
119
+ boolean isImmediatePropagationStopped();
120
+ };
121
+
122
+ // Introduced in DOM Level 2:
123
+ interface UIEvent : Event {
124
+ readonly attribute views::AbstractView view;
125
+ readonly attribute long detail;
126
+ void initUIEvent(in DOMString typeArg,
127
+ in boolean canBubbleArg,
128
+ in boolean cancelableArg,
129
+ in views::AbstractView viewArg,
130
+ in long detailArg);
131
+ // Introduced in DOM Level 3:
132
+ void initUIEventNS(in DOMString namespaceURI,
133
+ in DOMString typeArg,
134
+ in boolean canBubbleArg,
135
+ in boolean cancelableArg,
136
+ in views::AbstractView viewArg,
137
+ in long detailArg);
138
+ };
139
+
140
+ // Introduced in DOM Level 3:
141
+ interface TextEvent : UIEvent {
142
+ readonly attribute DOMString data;
143
+ void initTextEvent(in DOMString typeArg,
144
+ in boolean canBubbleArg,
145
+ in boolean cancelableArg,
146
+ in views::AbstractView viewArg,
147
+ in DOMString dataArg);
148
+ void initTextEventNS(in DOMString namespaceURI,
149
+ in DOMString type,
150
+ in boolean canBubbleArg,
151
+ in boolean cancelableArg,
152
+ in views::AbstractView viewArg,
153
+ in DOMString dataArg);
154
+ };
155
+
156
+ // Introduced in DOM Level 2:
157
+ interface MouseEvent : UIEvent {
158
+ readonly attribute long screenX;
159
+ readonly attribute long screenY;
160
+ readonly attribute long clientX;
161
+ readonly attribute long clientY;
162
+ readonly attribute boolean ctrlKey;
163
+ readonly attribute boolean shiftKey;
164
+ readonly attribute boolean altKey;
165
+ readonly attribute boolean metaKey;
166
+ readonly attribute unsigned short button;
167
+ readonly attribute EventTarget relatedTarget;
168
+ void initMouseEvent(in DOMString typeArg,
169
+ in boolean canBubbleArg,
170
+ in boolean cancelableArg,
171
+ in views::AbstractView viewArg,
172
+ in long detailArg,
173
+ in long screenXArg,
174
+ in long screenYArg,
175
+ in long clientXArg,
176
+ in long clientYArg,
177
+ in boolean ctrlKeyArg,
178
+ in boolean altKeyArg,
179
+ in boolean shiftKeyArg,
180
+ in boolean metaKeyArg,
181
+ in unsigned short buttonArg,
182
+ in EventTarget relatedTargetArg);
183
+ // Introduced in DOM Level 3:
184
+ boolean getModifierState(in DOMString keyIdentifierArg);
185
+ // Introduced in DOM Level 3:
186
+ void initMouseEventNS(in DOMString namespaceURI,
187
+ in DOMString typeArg,
188
+ in boolean canBubbleArg,
189
+ in boolean cancelableArg,
190
+ in views::AbstractView viewArg,
191
+ in long detailArg,
192
+ in long screenXArg,
193
+ in long screenYArg,
194
+ in long clientXArg,
195
+ in long clientYArg,
196
+ in unsigned short buttonArg,
197
+ in EventTarget relatedTargetArg,
198
+ in DOMString modifiersList);
199
+ };
200
+
201
+ // Introduced in DOM Level 3:
202
+ interface KeyboardEvent : UIEvent {
203
+
204
+ // KeyLocationCode
205
+ const unsigned long DOM_KEY_LOCATION_STANDARD = 0x00;
206
+ const unsigned long DOM_KEY_LOCATION_LEFT = 0x01;
207
+ const unsigned long DOM_KEY_LOCATION_RIGHT = 0x02;
208
+ const unsigned long DOM_KEY_LOCATION_NUMPAD = 0x03;
209
+
210
+ readonly attribute DOMString keyIdentifier;
211
+ readonly attribute unsigned long keyLocation;
212
+ readonly attribute boolean ctrlKey;
213
+ readonly attribute boolean shiftKey;
214
+ readonly attribute boolean altKey;
215
+ readonly attribute boolean metaKey;
216
+ boolean getModifierState(in DOMString keyIdentifierArg);
217
+ void initKeyboardEvent(in DOMString typeArg,
218
+ in boolean canBubbleArg,
219
+ in boolean cancelableArg,
220
+ in views::AbstractView viewArg,
221
+ in DOMString keyIdentifierArg,
222
+ in unsigned long keyLocationArg,
223
+ in DOMString modifiersList);
224
+ void initKeyboardEventNS(in DOMString namespaceURI,
225
+ in DOMString typeArg,
226
+ in boolean canBubbleArg,
227
+ in boolean cancelableArg,
228
+ in views::AbstractView viewArg,
229
+ in DOMString keyIdentifierArg,
230
+ in unsigned long keyLocationArg,
231
+ in DOMString modifiersList);
232
+ };
233
+
234
+ // Introduced in DOM Level 2:
235
+ interface MutationEvent : Event {
236
+
237
+ // attrChangeType
238
+ const unsigned short MODIFICATION = 1;
239
+ const unsigned short ADDITION = 2;
240
+ const unsigned short REMOVAL = 3;
241
+
242
+ readonly attribute Node relatedNode;
243
+ readonly attribute DOMString prevValue;
244
+ readonly attribute DOMString newValue;
245
+ readonly attribute DOMString attrName;
246
+ readonly attribute unsigned short attrChange;
247
+ void initMutationEvent(in DOMString typeArg,
248
+ in boolean canBubbleArg,
249
+ in boolean cancelableArg,
250
+ in Node relatedNodeArg,
251
+ in DOMString prevValueArg,
252
+ in DOMString newValueArg,
253
+ in DOMString attrNameArg,
254
+ in unsigned short attrChangeArg);
255
+ // Introduced in DOM Level 3:
256
+ void initMutationEventNS(in DOMString namespaceURI,
257
+ in DOMString typeArg,
258
+ in boolean canBubbleArg,
259
+ in boolean cancelableArg,
260
+ in Node relatedNodeArg,
261
+ in DOMString prevValueArg,
262
+ in DOMString newValueArg,
263
+ in DOMString attrNameArg,
264
+ in unsigned short attrChangeArg);
265
+ };
266
+
267
+ // Introduced in DOM Level 3:
268
+ interface MutationNameEvent : MutationEvent {
269
+ readonly attribute DOMString prevNamespaceURI;
270
+ readonly attribute DOMString prevNodeName;
271
+ // Introduced in DOM Level 3:
272
+ void initMutationNameEvent(in DOMString typeArg,
273
+ in boolean canBubbleArg,
274
+ in boolean cancelableArg,
275
+ in Node relatedNodeArg,
276
+ in DOMString prevNamespaceURIArg,
277
+ in DOMString prevNodeNameArg);
278
+ // Introduced in DOM Level 3:
279
+ void initMutationNameEventNS(in DOMString namespaceURI,
280
+ in DOMString typeArg,
281
+ in boolean canBubbleArg,
282
+ in boolean cancelableArg,
283
+ in Node relatedNodeArg,
284
+ in DOMString prevNamespaceURIArg,
285
+ in DOMString prevNodeNameArg);
286
+ };
287
+ };
288
+
289
+ #endif // _EVENTS_IDL_
290
+ */
291
+
292
+ exports.dom = {
293
+ level3 : {
294
+ events: events
295
+ }
296
+ }
@@ -0,0 +1,5 @@
1
+ exports.dom = {
2
+ level3 : {
3
+ html : require("../level2/html").dom.level2.html
4
+ }
5
+ };
@@ -0,0 +1,7 @@
1
+ exports.dom = {
2
+ level3 : {
3
+ core : require("./core").dom.level3.core,
4
+ events : require("./events").dom.level3.events,
5
+ html : require("./html").dom.level3.html
6
+ }
7
+ };
@@ -0,0 +1,769 @@
1
+ /***********************************************
2
+ Copyright 2010, Chris Winberry <chris@winberry.net>. All rights reserved.
3
+ Permission is hereby granted, free of charge, to any person obtaining a copy
4
+ of this software and associated documentation files (the "Software"), to
5
+ deal in the Software without restriction, including without limitation the
6
+ rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
7
+ sell copies of the Software, and to permit persons to whom the Software is
8
+ furnished to do so, subject to the following conditions:
9
+
10
+ The above copyright notice and this permission notice shall be included in
11
+ all copies or substantial portions of the Software.
12
+
13
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
18
+ FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
19
+ IN THE SOFTWARE.
20
+ ***********************************************/
21
+ /* v1.6.3 */
22
+
23
+ (function () {
24
+
25
+ function runningInNode () {
26
+ return(
27
+ (typeof require) == "function"
28
+ &&
29
+ (typeof exports) == "object"
30
+ &&
31
+ (typeof module) == "object"
32
+ &&
33
+ (typeof __filename) == "string"
34
+ &&
35
+ (typeof __dirname) == "string"
36
+ );
37
+ }
38
+
39
+ if (!runningInNode()) {
40
+ if (!this.Tautologistics)
41
+ this.Tautologistics = {};
42
+ else if (this.Tautologistics.NodeHtmlParser)
43
+ return; //NodeHtmlParser already defined!
44
+ this.Tautologistics.NodeHtmlParser = {};
45
+ exports = this.Tautologistics.NodeHtmlParser;
46
+ }
47
+
48
+ //Types of elements found in the DOM
49
+ var ElementType = {
50
+ Text: "text" //Plain text
51
+ , Directive: "directive" //Special tag <!...>
52
+ , Comment: "comment" //Special tag <!--...-->
53
+ , Script: "script" //Special tag <script>...</script>
54
+ , Style: "style" //Special tag <style>...</style>
55
+ , Tag: "tag" //Any tag that isn't special
56
+ }
57
+
58
+ function Parser (handler) {
59
+ this.validateHandler(handler);
60
+ this._handler = handler;
61
+ this.reset();
62
+ }
63
+
64
+ //**"Static"**//
65
+ //Regular expressions used for cleaning up and parsing (stateless)
66
+ Parser._reTrim = /(^\s+|\s+$)/g; //Trim leading/trailing whitespace
67
+ Parser._reTrimComment = /(^\!--|--$)/g; //Remove comment tag markup from comment contents
68
+ Parser._reWhitespace = /\s/g; //Used to find any whitespace to split on
69
+ Parser._reTagName = /^\s*(\/?)\s*([^\s\/]+)/; //Used to find the tag name for an element
70
+
71
+ //Regular expressions used for parsing (stateful)
72
+ Parser._reAttrib = //Find attributes in a tag
73
+ /([^=<>\"\'\s]+)\s*=\s*"([^"]*)"|([^=<>\"\'\s]+)\s*=\s*'([^']*)'|([^=<>\"\'\s]+)\s*=\s*([^'"\s]+)|([^=<>\"\'\s\/]+)/g;
74
+ Parser._reTags = /[\<\>]/g; //Find tag markers
75
+
76
+ //**Public**//
77
+ //Methods//
78
+ //Parses a complete HTML and pushes it to the handler
79
+ Parser.prototype.parseComplete = function Parser$parseComplete (data) {
80
+ this.reset();
81
+ this.parseChunk(data);
82
+ this.done();
83
+ }
84
+
85
+ //Parses a piece of an HTML document
86
+ Parser.prototype.parseChunk = function Parser$parseChunk (data) {
87
+ if (this._done)
88
+ this.handleError(new Error("Attempted to parse chunk after parsing already done"));
89
+ this._buffer += data; //FIXME: this can be a bottleneck
90
+ this.parseTags();
91
+ }
92
+
93
+ //Tells the parser that the HTML being parsed is complete
94
+ Parser.prototype.done = function Parser$done () {
95
+ if (this._done)
96
+ return;
97
+ this._done = true;
98
+
99
+ //Push any unparsed text into a final element in the element list
100
+ if (this._buffer.length) {
101
+ var rawData = this._buffer;
102
+ this._buffer = "";
103
+ var element = {
104
+ raw: rawData
105
+ , data: (this._parseState == ElementType.Text) ? rawData : rawData.replace(Parser._reTrim, "")
106
+ , type: this._parseState
107
+ };
108
+ if (this._parseState == ElementType.Tag || this._parseState == ElementType.Script || this._parseState == ElementType.Style)
109
+ element.name = this.parseTagName(element.data);
110
+ this.parseAttribs(element);
111
+ this._elements.push(element);
112
+ }
113
+
114
+ this.writeHandler();
115
+ this._handler.done();
116
+ }
117
+
118
+ //Resets the parser to a blank state, ready to parse a new HTML document
119
+ Parser.prototype.reset = function Parser$reset () {
120
+ this._buffer = "";
121
+ this._done = false;
122
+ this._elements = [];
123
+ this._elementsCurrent = 0;
124
+ this._current = 0;
125
+ this._next = 0;
126
+ this._parseState = ElementType.Text;
127
+ this._prevTagSep = '';
128
+ this._tagStack = [];
129
+ this._handler.reset();
130
+ }
131
+
132
+ //**Private**//
133
+ //Properties//
134
+ Parser.prototype._handler = null; //Handler for parsed elements
135
+ Parser.prototype._buffer = null; //Buffer of unparsed data
136
+ Parser.prototype._done = false; //Flag indicating whether parsing is done
137
+ Parser.prototype._elements = null; //Array of parsed elements
138
+ Parser.prototype._elementsCurrent = 0; //Pointer to last element in _elements that has been processed
139
+ Parser.prototype._current = 0; //Position in data that has already been parsed
140
+ Parser.prototype._next = 0; //Position in data of the next tag marker (<>)
141
+ Parser.prototype._parseState = ElementType.Text; //Current type of element being parsed
142
+ Parser.prototype._prevTagSep = ''; //Previous tag marker found
143
+ //Stack of element types previously encountered; keeps track of when
144
+ //parsing occurs inside a script/comment/style tag
145
+ Parser.prototype._tagStack = null;
146
+
147
+ //Methods//
148
+ //Takes an array of elements and parses any found attributes
149
+ Parser.prototype.parseTagAttribs = function Parser$parseTagAttribs (elements) {
150
+ var idxEnd = elements.length;
151
+ var idx = 0;
152
+
153
+ while (idx < idxEnd) {
154
+ var element = elements[idx++];
155
+ if (element.type == ElementType.Tag || element.type == ElementType.Script || element.type == ElementType.style)
156
+ this.parseAttribs(element);
157
+ }
158
+
159
+ return(elements);
160
+ }
161
+
162
+ //Takes an element and adds an "attribs" property for any element attributes found
163
+ Parser.prototype.parseAttribs = function Parser$parseAttribs (element) {
164
+ //Only parse attributes for tags
165
+ if (element.type != ElementType.Script && element.type != ElementType.Style && element.type != ElementType.Tag)
166
+ return;
167
+
168
+ var tagName = element.data.split(Parser._reWhitespace, 1)[0];
169
+ var attribRaw = element.data.substring(tagName.length);
170
+ if (attribRaw.length < 1)
171
+ return;
172
+
173
+ var match;
174
+ Parser._reAttrib.lastIndex = 0;
175
+ while (match = Parser._reAttrib.exec(attribRaw)) {
176
+ if (element.attribs == undefined)
177
+ element.attribs = {};
178
+
179
+ if (typeof match[1] == "string" && match[1].length) {
180
+ element.attribs[match[1]] = match[2];
181
+ } else if (typeof match[3] == "string" && match[3].length) {
182
+ element.attribs[match[3].toString()] = match[4].toString();
183
+ } else if (typeof match[5] == "string" && match[5].length) {
184
+ element.attribs[match[5]] = match[6];
185
+ } else if (typeof match[7] == "string" && match[7].length) {
186
+ element.attribs[match[7]] = match[7];
187
+ }
188
+ }
189
+ }
190
+
191
+ //Extracts the base tag name from the data value of an element
192
+ Parser.prototype.parseTagName = function Parser$parseTagName (data) {
193
+ if (data == null || data == "")
194
+ return("");
195
+ var match = Parser._reTagName.exec(data);
196
+ if (!match)
197
+ return("");
198
+ return((match[1] ? "/" : "") + match[2]);
199
+ }
200
+
201
+ //Parses through HTML text and returns an array of found elements
202
+ //I admit, this function is rather large but splitting up had an noticeable impact on speed
203
+ Parser.prototype.parseTags = function Parser$parseTags () {
204
+ var bufferEnd = this._buffer.length - 1;
205
+ while (Parser._reTags.test(this._buffer)) {
206
+ this._next = Parser._reTags.lastIndex - 1;
207
+ var tagSep = this._buffer.charAt(this._next); //The currently found tag marker
208
+ var rawData = this._buffer.substring(this._current, this._next); //The next chunk of data to parse
209
+
210
+ //A new element to eventually be appended to the element list
211
+ var element = {
212
+ raw: rawData
213
+ , data: (this._parseState == ElementType.Text) ? rawData : rawData.replace(Parser._reTrim, "")
214
+ , type: this._parseState
215
+ };
216
+
217
+ var elementName = this.parseTagName(element.data);
218
+
219
+ //This section inspects the current tag stack and modifies the current
220
+ //element if we're actually parsing a special area (script/comment/style tag)
221
+ if (this._tagStack.length) { //We're parsing inside a script/comment/style tag
222
+ if (this._tagStack[this._tagStack.length - 1] == ElementType.Script) { //We're currently in a script tag
223
+ if (elementName == "/script") //Actually, we're no longer in a script tag, so pop it off the stack
224
+ this._tagStack.pop();
225
+ else { //Not a closing script tag
226
+ if (element.raw.indexOf("!--") != 0) { //Make sure we're not in a comment
227
+ //All data from here to script close is now a text element
228
+ element.type = ElementType.Text;
229
+ //If the previous element is text, append the current text to it
230
+ if (this._elements.length && this._elements[this._elements.length - 1].type == ElementType.Text) {
231
+ var prevElement = this._elements[this._elements.length - 1];
232
+ prevElement.raw = prevElement.data = prevElement.raw + this._prevTagSep + element.raw;
233
+ element.raw = element.data = ""; //This causes the current element to not be added to the element list
234
+ }
235
+ }
236
+ }
237
+ }
238
+ else if (this._tagStack[this._tagStack.length - 1] == ElementType.Style) { //We're currently in a style tag
239
+ if (elementName == "/style") //Actually, we're no longer in a style tag, so pop it off the stack
240
+ this._tagStack.pop();
241
+ else {
242
+ if (element.raw.indexOf("!--") != 0) { //Make sure we're not in a comment
243
+ //All data from here to style close is now a text element
244
+ element.type = ElementType.Text;
245
+ //If the previous element is text, append the current text to it
246
+ if (this._elements.length && this._elements[this._elements.length - 1].type == ElementType.Text) {
247
+ if (element.raw != "") {
248
+ var prevElement = this._elements[this._elements.length - 1];
249
+ prevElement.raw = prevElement.data = prevElement.raw + this._prevTagSep + element.raw;
250
+ element.raw = element.data = ""; //This causes the current element to not be added to the element list
251
+ }
252
+ else //Element is empty, so just append the last tag marker found
253
+ prevElement.raw = prevElement.data = prevElement.raw + this._prevTagSep;
254
+ }
255
+ else //The previous element was not text
256
+ if (element.raw != "")
257
+ element.raw = element.data = element.raw;
258
+ }
259
+ }
260
+ }
261
+ else if (this._tagStack[this._tagStack.length - 1] == ElementType.Comment) { //We're currently in a comment tag
262
+ var rawLen = element.raw.length;
263
+ if (element.raw.charAt(rawLen - 2) == "-" && element.raw.charAt(rawLen - 1) == "-" && tagSep == ">") {
264
+ //Actually, we're no longer in a style tag, so pop it off the stack
265
+ this._tagStack.pop();
266
+ //If the previous element is a comment, append the current text to it
267
+ if (this._elements.length && this._elements[this._elements.length - 1].type == ElementType.Comment) {
268
+ var prevElement = this._elements[this._elements.length - 1];
269
+ prevElement.raw = prevElement.data = (prevElement.raw + element.raw).replace(Parser._reTrimComment, "");
270
+ element.raw = element.data = ""; //This causes the current element to not be added to the element list
271
+ element.type = ElementType.Text;
272
+ }
273
+ else //Previous element not a comment
274
+ element.type = ElementType.Comment; //Change the current element's type to a comment
275
+ }
276
+ else { //Still in a comment tag
277
+ element.type = ElementType.Comment;
278
+ //If the previous element is a comment, append the current text to it
279
+ if (this._elements.length && this._elements[this._elements.length - 1].type == ElementType.Comment) {
280
+ var prevElement = this._elements[this._elements.length - 1];
281
+ prevElement.raw = prevElement.data = prevElement.raw + element.raw + tagSep;
282
+ element.raw = element.data = ""; //This causes the current element to not be added to the element list
283
+ element.type = ElementType.Text;
284
+ }
285
+ else
286
+ element.raw = element.data = element.raw + tagSep;
287
+ }
288
+ }
289
+ }
290
+
291
+ //Processing of non-special tags
292
+ if (element.type == ElementType.Tag) {
293
+ element.name = elementName;
294
+
295
+ if (element.raw.indexOf("!--") == 0) { //This tag is really comment
296
+ element.type = ElementType.Comment;
297
+ delete element["name"];
298
+ var rawLen = element.raw.length;
299
+ //Check if the comment is terminated in the current element
300
+ if (element.raw.charAt(rawLen - 1) == "-" && element.raw.charAt(rawLen - 2) == "-" && tagSep == ">")
301
+ element.raw = element.data = element.raw.replace(Parser._reTrimComment, "");
302
+ else { //It's not so push the comment onto the tag stack
303
+ element.raw += tagSep;
304
+ this._tagStack.push(ElementType.Comment);
305
+ }
306
+ }
307
+ else if (element.raw.indexOf("!") == 0 || element.raw.indexOf("?") == 0) {
308
+ element.type = ElementType.Directive;
309
+ //TODO: what about CDATA?
310
+ }
311
+ else if (element.name == "script") {
312
+ element.type = ElementType.Script;
313
+ //Special tag, push onto the tag stack if not terminated
314
+ if (element.data.charAt(element.data.length - 1) != "/")
315
+ this._tagStack.push(ElementType.Script);
316
+ }
317
+ else if (element.name == "/script")
318
+ element.type = ElementType.Script;
319
+ else if (element.name == "style") {
320
+ element.type = ElementType.Style;
321
+ //Special tag, push onto the tag stack if not terminated
322
+ if (element.data.charAt(element.data.length - 1) != "/")
323
+ this._tagStack.push(ElementType.Style);
324
+ }
325
+ else if (element.name == "/style")
326
+ element.type = ElementType.Style;
327
+ if (element.name && element.name.charAt(0) == "/")
328
+ element.data = element.name;
329
+ }
330
+
331
+ //Add all tags and non-empty text elements to the element list
332
+ if (element.raw != "" || element.type != ElementType.Text) {
333
+ this.parseAttribs(element);
334
+ this._elements.push(element);
335
+ //If tag self-terminates, add an explicit, separate closing tag
336
+ if (
337
+ element.type != ElementType.Text
338
+ &&
339
+ element.type != ElementType.Comment
340
+ &&
341
+ element.type != ElementType.Directive
342
+ &&
343
+ element.data.charAt(element.data.length - 1) == "/"
344
+ )
345
+ this._elements.push({
346
+ raw: "/" + element.name
347
+ , data: "/" + element.name
348
+ , name: "/" + element.name
349
+ , type: element.type
350
+ });
351
+ }
352
+ this._parseState = (tagSep == "<") ? ElementType.Tag : ElementType.Text;
353
+ this._current = this._next + 1;
354
+ this._prevTagSep = tagSep;
355
+ }
356
+
357
+ this._buffer = (this._current <= bufferEnd) ? this._buffer.substring(this._current) : "";
358
+ this._current = 0;
359
+
360
+ this.writeHandler();
361
+ }
362
+
363
+ //Checks the handler to make it is an object with the right "interface"
364
+ Parser.prototype.validateHandler = function Parser$validateHandler (handler) {
365
+ if ((typeof handler) != "object")
366
+ throw new Error("Handler is not an object");
367
+ if ((typeof handler.reset) != "function")
368
+ throw new Error("Handler method 'reset' is invalid");
369
+ if ((typeof handler.done) != "function")
370
+ throw new Error("Handler method 'done' is invalid");
371
+ if ((typeof handler.writeTag) != "function")
372
+ throw new Error("Handler method 'writeTag' is invalid");
373
+ if ((typeof handler.writeText) != "function")
374
+ throw new Error("Handler method 'writeText' is invalid");
375
+ if ((typeof handler.writeComment) != "function")
376
+ throw new Error("Handler method 'writeComment' is invalid");
377
+ if ((typeof handler.writeDirective) != "function")
378
+ throw new Error("Handler method 'writeDirective' is invalid");
379
+ }
380
+
381
+ //Writes parsed elements out to the handler
382
+ Parser.prototype.writeHandler = function Parser$writeHandler (forceFlush) {
383
+ forceFlush = !!forceFlush;
384
+ if (this._tagStack.length && !forceFlush)
385
+ return;
386
+ while (this._elements.length) {
387
+ var element = this._elements.shift();
388
+ switch (element.type) {
389
+ case ElementType.Comment:
390
+ this._handler.writeComment(element);
391
+ break;
392
+ case ElementType.Directive:
393
+ this._handler.writeDirective(element);
394
+ break;
395
+ case ElementType.Text:
396
+ this._handler.writeText(element);
397
+ break;
398
+ default:
399
+ this._handler.writeTag(element);
400
+ break;
401
+ }
402
+ }
403
+ }
404
+
405
+ Parser.prototype.handleError = function Parser$handleError (error) {
406
+ if ((typeof this._handler.error) == "function")
407
+ this._handler.error(error);
408
+ else
409
+ throw error;
410
+ }
411
+
412
+ //TODO: make this a trully streamable handler
413
+ function RssHandler (callback) {
414
+ RssHandler.super_.call(this, callback, { ignoreWhitespace: true, verbose: false, enforceEmptyTags: false });
415
+ }
416
+ inherits(RssHandler, DefaultHandler);
417
+
418
+ RssHandler.prototype.done = function RssHandler$done () {
419
+ var feed = { };
420
+ var feedRoot;
421
+
422
+ var found = DomUtils.getElementsByTagName(function (value) { return(value == "rss" || value == "feed"); }, this.dom, false);
423
+ if (found.length) {
424
+ feedRoot = found[0];
425
+ }
426
+ if (feedRoot) {
427
+ if (feedRoot.name == "rss") {
428
+ feed.type = "rss";
429
+ feedRoot = feedRoot.children[0]; //<channel/>
430
+ feed.id = "";
431
+ try {
432
+ feed.title = DomUtils.getElementsByTagName("title", feedRoot.children, false)[0].children[0].data;
433
+ } catch (ex) { }
434
+ try {
435
+ feed.link = DomUtils.getElementsByTagName("link", feedRoot.children, false)[0].children[0].data;
436
+ } catch (ex) { }
437
+ try {
438
+ feed.description = DomUtils.getElementsByTagName("description", feedRoot.children, false)[0].children[0].data;
439
+ } catch (ex) { }
440
+ try {
441
+ feed.updated = new Date(DomUtils.getElementsByTagName("lastBuildDate", feedRoot.children, false)[0].children[0].data);
442
+ } catch (ex) { }
443
+ try {
444
+ feed.author = DomUtils.getElementsByTagName("managingEditor", feedRoot.children, false)[0].children[0].data;
445
+ } catch (ex) { }
446
+ feed.items = [];
447
+ DomUtils.getElementsByTagName("item", feedRoot.children).forEach(function (item, index, list) {
448
+ var entry = {};
449
+ try {
450
+ entry.id = DomUtils.getElementsByTagName("guid", item.children, false)[0].children[0].data;
451
+ } catch (ex) { }
452
+ try {
453
+ entry.title = DomUtils.getElementsByTagName("title", item.children, false)[0].children[0].data;
454
+ } catch (ex) { }
455
+ try {
456
+ entry.link = DomUtils.getElementsByTagName("link", item.children, false)[0].children[0].data;
457
+ } catch (ex) { }
458
+ try {
459
+ entry.description = DomUtils.getElementsByTagName("description", item.children, false)[0].children[0].data;
460
+ } catch (ex) { }
461
+ try {
462
+ entry.pubDate = new Date(DomUtils.getElementsByTagName("pubDate", item.children, false)[0].children[0].data);
463
+ } catch (ex) { }
464
+ feed.items.push(entry);
465
+ });
466
+ } else {
467
+ feed.type = "atom";
468
+ try {
469
+ feed.id = DomUtils.getElementsByTagName("id", feedRoot.children, false)[0].children[0].data;
470
+ } catch (ex) { }
471
+ try {
472
+ feed.title = DomUtils.getElementsByTagName("title", feedRoot.children, false)[0].children[0].data;
473
+ } catch (ex) { }
474
+ try {
475
+ feed.link = DomUtils.getElementsByTagName("link", feedRoot.children, false)[0].attribs.href;
476
+ } catch (ex) { }
477
+ try {
478
+ feed.description = DomUtils.getElementsByTagName("subtitle", feedRoot.children, false)[0].children[0].data;
479
+ } catch (ex) { }
480
+ try {
481
+ feed.updated = new Date(DomUtils.getElementsByTagName("updated", feedRoot.children, false)[0].children[0].data);
482
+ } catch (ex) { }
483
+ try {
484
+ feed.author = DomUtils.getElementsByTagName("email", feedRoot.children, true)[0].children[0].data;
485
+ } catch (ex) { }
486
+ feed.items = [];
487
+ DomUtils.getElementsByTagName("entry", feedRoot.children).forEach(function (item, index, list) {
488
+ var entry = {};
489
+ try {
490
+ entry.id = DomUtils.getElementsByTagName("id", item.children, false)[0].children[0].data;
491
+ } catch (ex) { }
492
+ try {
493
+ entry.title = DomUtils.getElementsByTagName("title", item.children, false)[0].children[0].data;
494
+ } catch (ex) { }
495
+ try {
496
+ entry.link = DomUtils.getElementsByTagName("link", item.children, false)[0].attribs.href;
497
+ } catch (ex) { }
498
+ try {
499
+ entry.description = DomUtils.getElementsByTagName("summary", item.children, false)[0].children[0].data;
500
+ } catch (ex) { }
501
+ try {
502
+ entry.pubDate = new Date(DomUtils.getElementsByTagName("updated", item.children, false)[0].children[0].data);
503
+ } catch (ex) { }
504
+ feed.items.push(entry);
505
+ });
506
+ }
507
+
508
+ this.dom = feed;
509
+ }
510
+ RssHandler.super_.prototype.done.call(this);
511
+ }
512
+
513
+ ///////////////////////////////////////////////////
514
+
515
+ function DefaultHandler (callback, options) {
516
+ this.reset();
517
+ this._options = options ? options : { };
518
+ if (this._options.ignoreWhitespace == undefined)
519
+ this._options.ignoreWhitespace = false; //Keep whitespace-only text nodes
520
+ if (this._options.verbose == undefined)
521
+ this._options.verbose = true; //Keep data property for tags and raw property for all
522
+ if (this._options.enforceEmptyTags == undefined)
523
+ this._options.enforceEmptyTags = true; //Don't allow children for HTML tags defined as empty in spec
524
+ if ((typeof callback) == "function")
525
+ this._callback = callback;
526
+ }
527
+
528
+ //**"Static"**//
529
+ //HTML Tags that shouldn't contain child nodes
530
+ DefaultHandler._emptyTags = {
531
+ area: 1
532
+ , base: 1
533
+ , basefont: 1
534
+ , br: 1
535
+ , col: 1
536
+ , frame: 1
537
+ , hr: 1
538
+ , img: 1
539
+ , input: 1
540
+ , isindex: 1
541
+ , link: 1
542
+ , meta: 1
543
+ , param: 1
544
+ , embed: 1
545
+ }
546
+ //Regex to detect whitespace only text nodes
547
+ DefaultHandler.reWhitespace = /^\s*$/;
548
+
549
+ //**Public**//
550
+ //Properties//
551
+ DefaultHandler.prototype.dom = null; //The hierarchical object containing the parsed HTML
552
+ //Methods//
553
+ //Resets the handler back to starting state
554
+ DefaultHandler.prototype.reset = function DefaultHandler$reset() {
555
+ this.dom = [];
556
+ this._done = false;
557
+ this._tagStack = [];
558
+ this._tagStack.last = function DefaultHandler$_tagStack$last () {
559
+ return(this.length ? this[this.length - 1] : null);
560
+ }
561
+ }
562
+ //Signals the handler that parsing is done
563
+ DefaultHandler.prototype.done = function DefaultHandler$done () {
564
+ this._done = true;
565
+ this.handleCallback(null);
566
+ }
567
+ DefaultHandler.prototype.writeTag = function DefaultHandler$writeTag (element) {
568
+ this.handleElement(element);
569
+ }
570
+ DefaultHandler.prototype.writeText = function DefaultHandler$writeText (element) {
571
+ if (this._options.ignoreWhitespace)
572
+ if (DefaultHandler.reWhitespace.test(element.data))
573
+ return;
574
+ this.handleElement(element);
575
+ }
576
+ DefaultHandler.prototype.writeComment = function DefaultHandler$writeComment (element) {
577
+ this.handleElement(element);
578
+ }
579
+ DefaultHandler.prototype.writeDirective = function DefaultHandler$writeDirective (element) {
580
+ this.handleElement(element);
581
+ }
582
+ DefaultHandler.prototype.error = function DefaultHandler$error (error) {
583
+ this.handleCallback(error);
584
+ }
585
+
586
+ //**Private**//
587
+ //Properties//
588
+ DefaultHandler.prototype._options = null; //Handler options for how to behave
589
+ DefaultHandler.prototype._callback = null; //Callback to respond to when parsing done
590
+ DefaultHandler.prototype._done = false; //Flag indicating whether handler has been notified of parsing completed
591
+ DefaultHandler.prototype._tagStack = null; //List of parents to the currently element being processed
592
+ //Methods//
593
+ DefaultHandler.prototype.handleCallback = function DefaultHandler$handleCallback (error) {
594
+ if ((typeof this._callback) != "function")
595
+ if (error)
596
+ throw error;
597
+ else
598
+ return;
599
+ this._callback(error, this.dom);
600
+ }
601
+ DefaultHandler.prototype.handleElement = function DefaultHandler$handleElement (element) {
602
+ if (this._done)
603
+ this.handleCallback(new Error("Writing to the handler after done() called is not allowed without a reset()"));
604
+ if (!this._options.verbose) {
605
+ // element.raw = null; //FIXME: Not clean
606
+ //FIXME: Serious performance problem using delete
607
+ delete element.raw;
608
+ if (element.type == "tag" || element.type == "script" || element.type == "style")
609
+ delete element.data;
610
+ }
611
+ if (!this._tagStack.last()) { //There are no parent elements
612
+ //If the element can be a container, add it to the tag stack and the top level list
613
+ if (element.type != ElementType.Text && element.type != ElementType.Comment && element.type != ElementType.Directive) {
614
+ if (element.name.charAt(0) != "/") { //Ignore closing tags that obviously don't have an opening tag
615
+ this.dom.push(element);
616
+ if (!this._options.enforceEmptyTags || !DefaultHandler._emptyTags[element.name]) { //Don't add tags to the tag stack that can't have children
617
+ this._tagStack.push(element);
618
+ }
619
+ }
620
+ }
621
+ else //Otherwise just add to the top level list
622
+ this.dom.push(element);
623
+ }
624
+ else { //There are parent elements
625
+ //If the element can be a container, add it as a child of the element
626
+ //on top of the tag stack and then add it to the tag stack
627
+ if (element.type != ElementType.Text && element.type != ElementType.Comment && element.type != ElementType.Directive) {
628
+ if (element.name.charAt(0) == "/") {
629
+ //This is a closing tag, scan the tagStack to find the matching opening tag
630
+ //and pop the stack up to the opening tag's parent
631
+ var baseName = element.name.substring(1);
632
+ if (!this._options.enforceEmptyTags || !DefaultHandler._emptyTags[baseName]) {
633
+ var pos = this._tagStack.length - 1;
634
+ while (pos > -1 && this._tagStack[pos--].name != baseName) { }
635
+ if (pos > -1 || this._tagStack[0].name == baseName)
636
+ while (pos < this._tagStack.length - 1)
637
+ this._tagStack.pop();
638
+ }
639
+ }
640
+ else { //This is not a closing tag
641
+ if (!this._tagStack.last().children)
642
+ this._tagStack.last().children = [];
643
+ this._tagStack.last().children.push(element);
644
+ if (!this._options.enforceEmptyTags || !DefaultHandler._emptyTags[element.name]) //Don't add tags to the tag stack that can't have children
645
+ this._tagStack.push(element);
646
+ }
647
+ }
648
+ else { //This is not a container element
649
+ if (!this._tagStack.last().children)
650
+ this._tagStack.last().children = [];
651
+ this._tagStack.last().children.push(element);
652
+ }
653
+ }
654
+ }
655
+
656
+ var DomUtils = {
657
+ testElement: function DomUtils$testElement (options, element) {
658
+ if (!element) {
659
+ return false;
660
+ }
661
+
662
+ for (var key in options) {
663
+ if (key == "tag_name") {
664
+ if (element.type != "tag" && element.type != "script" && element.type != "style") {
665
+ return false;
666
+ }
667
+ if (!options["tag_name"](element.name)) {
668
+ return false;
669
+ }
670
+ } else if (key == "tag_type") {
671
+ if (!options["tag_type"](element.type)) {
672
+ return false;
673
+ }
674
+ } else if (key == "tag_contains") {
675
+ if (element.type != "text" && element.type != "comment" && element.type != "directive") {
676
+ return false;
677
+ }
678
+ if (!options["tag_contains"](element.data)) {
679
+ return false;
680
+ }
681
+ } else {
682
+ if (!element.attribs || !options[key](element.attribs[key])) {
683
+ return false;
684
+ }
685
+ }
686
+ }
687
+
688
+ return true;
689
+ }
690
+
691
+ , getElements: function DomUtils$getElements (options, currentElement, recurse, limit) {
692
+ recurse = (recurse === undefined || recurse === null) || !!recurse;
693
+ limit = isNaN(parseInt(limit)) ? -1 : parseInt(limit);
694
+
695
+ if (!currentElement) {
696
+ return([]);
697
+ }
698
+
699
+ var found = [];
700
+ var elementList;
701
+
702
+ function getTest (checkVal) {
703
+ return(function (value) { return(value == checkVal); });
704
+ }
705
+ for (var key in options) {
706
+ if ((typeof options[key]) != "function") {
707
+ options[key] = getTest(options[key]);
708
+ }
709
+ }
710
+
711
+ if (DomUtils.testElement(options, currentElement)) {
712
+ found.push(currentElement);
713
+ }
714
+
715
+ if (limit >= 0 && found.length >= limit) {
716
+ return(found);
717
+ }
718
+
719
+ if (recurse && currentElement.children) {
720
+ elementList = currentElement.children;
721
+ } else if (currentElement instanceof Array) {
722
+ elementList = currentElement;
723
+ } else {
724
+ return(found);
725
+ }
726
+
727
+ for (var i = 0; i < elementList.length; i++) {
728
+ found = found.concat(DomUtils.getElements(options, elementList[i], recurse, limit));
729
+ if (limit >= 0 && found.length >= limit) {
730
+ break;
731
+ }
732
+ }
733
+
734
+ return(found);
735
+ }
736
+
737
+ , getElementById: function DomUtils$getElementById (id, currentElement, recurse) {
738
+ var result = DomUtils.getElements({ id: id }, currentElement, recurse, 1);
739
+ return(result.length ? result[0] : null);
740
+ }
741
+
742
+ , getElementsByTagName: function DomUtils$getElementsByTagName (name, currentElement, recurse, limit) {
743
+ return(DomUtils.getElements({ tag_name: name }, currentElement, recurse, limit));
744
+ }
745
+
746
+ , getElementsByTagType: function DomUtils$getElementsByTagType (type, currentElement, recurse, limit) {
747
+ return(DomUtils.getElements({ tag_type: type }, currentElement, recurse, limit));
748
+ }
749
+ }
750
+
751
+ function inherits (ctor, superCtor) {
752
+ var tempCtor = function(){};
753
+ tempCtor.prototype = superCtor.prototype;
754
+ ctor.super_ = superCtor;
755
+ ctor.prototype = new tempCtor();
756
+ ctor.prototype.constructor = ctor;
757
+ }
758
+
759
+ exports.Parser = Parser;
760
+
761
+ exports.DefaultHandler = DefaultHandler;
762
+
763
+ exports.RssHandler = RssHandler;
764
+
765
+ exports.ElementType = ElementType;
766
+
767
+ exports.DomUtils = DomUtils;
768
+
769
+ })();