liquid_cms 0.3.0.1 → 0.3.0.2
Sign up to get free protection for your applications and to get access to all the features.
- data/CHANGELOG.rdoc +5 -1
- data/Gemfile.lock +1 -1
- data/README.rdoc +5 -1
- data/app/helpers/cms/common_helper.rb +1 -0
- data/app/views/cms/pages/_page.html.erb +2 -1
- data/app/views/layouts/cms.html.erb +2 -1
- data/lib/generators/liquid_cms/templates/public/cms/codemirror/LICENSE +2 -2
- data/lib/generators/liquid_cms/templates/public/cms/codemirror/css/csscolors.css +12 -8
- data/lib/generators/liquid_cms/templates/public/cms/codemirror/css/docs.css +123 -29
- data/lib/generators/liquid_cms/templates/public/cms/codemirror/csstest.html +1 -1
- data/lib/generators/liquid_cms/templates/public/cms/codemirror/htmltest.html +1 -1
- data/lib/generators/liquid_cms/templates/public/cms/codemirror/index.html +232 -179
- data/lib/generators/liquid_cms/templates/public/cms/codemirror/js/codemirror.js +211 -65
- data/lib/generators/liquid_cms/templates/public/cms/codemirror/js/editor.js +360 -194
- data/lib/generators/liquid_cms/templates/public/cms/codemirror/js/mirrorframe.js +1 -1
- data/lib/generators/liquid_cms/templates/public/cms/codemirror/js/parsecss.js +11 -7
- data/lib/generators/liquid_cms/templates/public/cms/codemirror/js/parsejavascript.js +14 -5
- data/lib/generators/liquid_cms/templates/public/cms/codemirror/js/parsesparql.js +1 -1
- data/lib/generators/liquid_cms/templates/public/cms/codemirror/js/select.js +140 -87
- data/lib/generators/liquid_cms/templates/public/cms/codemirror/js/stringstream.js +5 -0
- data/lib/generators/liquid_cms/templates/public/cms/codemirror/js/tokenizejavascript.js +1 -1
- data/lib/generators/liquid_cms/templates/public/cms/codemirror/js/undo.js +7 -7
- data/lib/generators/liquid_cms/templates/public/cms/codemirror/manual.html +148 -52
- data/lib/generators/liquid_cms/templates/public/cms/codemirror/story.html +631 -614
- data/lib/generators/liquid_cms/templates/public/cms/stylesheets/styles.css +7 -7
- data/lib/liquid_cms/version.rb +1 -1
- metadata +4 -26
- data/lib/generators/liquid_cms/templates/public/cms/codemirror/contrib/lua/LICENSE +0 -32
- data/lib/generators/liquid_cms/templates/public/cms/codemirror/contrib/lua/css/luacolors.css +0 -63
- data/lib/generators/liquid_cms/templates/public/cms/codemirror/contrib/lua/index.html +0 -68
- data/lib/generators/liquid_cms/templates/public/cms/codemirror/contrib/lua/js/parselua.js +0 -253
- data/lib/generators/liquid_cms/templates/public/cms/codemirror/contrib/php/LICENSE +0 -37
- data/lib/generators/liquid_cms/templates/public/cms/codemirror/contrib/php/css/phpcolors.css +0 -114
- data/lib/generators/liquid_cms/templates/public/cms/codemirror/contrib/php/index.html +0 -292
- data/lib/generators/liquid_cms/templates/public/cms/codemirror/contrib/php/js/parsephp.js +0 -371
- data/lib/generators/liquid_cms/templates/public/cms/codemirror/contrib/php/js/parsephphtmlmixed.js +0 -90
- data/lib/generators/liquid_cms/templates/public/cms/codemirror/contrib/php/js/tokenizephp.js +0 -1006
- data/lib/generators/liquid_cms/templates/public/cms/codemirror/contrib/plsql/LICENSE +0 -22
- data/lib/generators/liquid_cms/templates/public/cms/codemirror/contrib/plsql/css/plsqlcolors.css +0 -57
- data/lib/generators/liquid_cms/templates/public/cms/codemirror/contrib/plsql/index.html +0 -67
- data/lib/generators/liquid_cms/templates/public/cms/codemirror/contrib/plsql/js/parseplsql.js +0 -233
- data/lib/generators/liquid_cms/templates/public/cms/codemirror/contrib/python/LICENSE +0 -32
- data/lib/generators/liquid_cms/templates/public/cms/codemirror/contrib/python/css/pythoncolors.css +0 -58
- data/lib/generators/liquid_cms/templates/public/cms/codemirror/contrib/python/index.html +0 -141
- data/lib/generators/liquid_cms/templates/public/cms/codemirror/contrib/python/js/parsepython.js +0 -542
- data/lib/generators/liquid_cms/templates/public/cms/codemirror/contrib/sql/LICENSE +0 -22
- data/lib/generators/liquid_cms/templates/public/cms/codemirror/contrib/sql/css/sqlcolors.css +0 -57
- data/lib/generators/liquid_cms/templates/public/cms/codemirror/contrib/sql/index.html +0 -56
- data/lib/generators/liquid_cms/templates/public/cms/codemirror/contrib/sql/js/parsesql.js +0 -211
@@ -1,297 +1,272 @@
|
|
1
|
-
|
1
|
+
<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd">
|
2
|
+
<html>
|
2
3
|
<head>
|
3
|
-
<title>Implementing a
|
4
|
-
<
|
5
|
-
|
6
|
-
padding: 3em 6em;
|
7
|
-
max-width: 50em;
|
8
|
-
}
|
9
|
-
h1 {
|
10
|
-
text-align: center;
|
11
|
-
margin: 0;
|
12
|
-
}
|
13
|
-
h2 {
|
14
|
-
font-size: 130%;
|
15
|
-
}
|
16
|
-
code {
|
17
|
-
font-family: courier, monospace;
|
18
|
-
font-size: 80%;
|
19
|
-
color: #144;
|
20
|
-
}
|
21
|
-
p {
|
22
|
-
margin: 1em 0;
|
23
|
-
}
|
24
|
-
pre.code {
|
25
|
-
min-width: 55em;
|
26
|
-
margin: 1.1em 12px;
|
27
|
-
border: 1px solid #CCCCCC;
|
28
|
-
padding: .4em;
|
29
|
-
font-family: courier, monospace;
|
30
|
-
}
|
31
|
-
</style>
|
4
|
+
<title>Implementing a Syntax-Highlighting JavaScript Editor In JavaScript</title>
|
5
|
+
<link rel="stylesheet" type="text/css" href="css/docs.css"/>
|
6
|
+
<link rel="stylesheet" type="text/css" href="http://fonts.googleapis.com/css?family=Droid+Sans|Droid+Sans+Mono|Droid+Sans:bold"/>
|
32
7
|
<link rel="stylesheet" type="text/css" href="css/jscolors.css"/>
|
8
|
+
<meta http-equiv="Content-Type" content="text/html; charset=utf-8"/>
|
33
9
|
</head>
|
34
10
|
<body>
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
<
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
<
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
|
150
|
-
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
|
156
|
-
|
157
|
-
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
|
162
|
-
|
163
|
-
|
164
|
-
|
165
|
-
|
166
|
-
|
167
|
-
|
168
|
-
|
169
|
-
<
|
170
|
-
|
171
|
-
|
172
|
-
|
173
|
-
|
174
|
-
|
175
|
-
|
176
|
-
|
177
|
-
|
178
|
-
|
179
|
-
|
180
|
-
|
181
|
-
|
182
|
-
|
183
|
-
|
184
|
-
|
185
|
-
|
186
|
-
|
187
|
-
|
188
|
-
|
189
|
-
|
190
|
-
|
191
|
-
|
192
|
-
|
193
|
-
|
194
|
-
|
195
|
-
|
196
|
-
|
197
|
-
|
198
|
-
|
199
|
-
|
200
|
-
|
201
|
-
|
202
|
-
|
203
|
-
|
204
|
-
|
205
|
-
|
206
|
-
|
207
|
-
|
208
|
-
|
209
|
-
|
210
|
-
|
211
|
-
|
212
|
-
|
213
|
-
|
214
|
-
|
215
|
-
|
216
|
-
|
217
|
-
|
218
|
-
|
219
|
-
|
220
|
-
|
221
|
-
|
222
|
-
|
223
|
-
|
224
|
-
|
225
|
-
|
226
|
-
|
227
|
-
|
228
|
-
|
229
|
-
|
230
|
-
|
231
|
-
|
232
|
-
|
233
|
-
|
234
|
-
|
235
|
-
|
236
|
-
|
237
|
-
|
238
|
-
|
239
|
-
|
240
|
-
|
241
|
-
|
242
|
-
|
243
|
-
|
244
|
-
|
245
|
-
|
246
|
-
|
247
|
-
|
248
|
-
|
249
|
-
|
250
|
-
|
251
|
-
|
252
|
-
|
253
|
-
|
254
|
-
|
255
|
-
|
256
|
-
|
257
|
-
|
258
|
-
|
259
|
-
|
260
|
-
|
261
|
-
|
262
|
-
|
263
|
-
|
264
|
-
|
265
|
-
|
266
|
-
|
267
|
-
|
268
|
-
|
269
|
-
|
270
|
-
|
271
|
-
|
272
|
-
|
273
|
-
|
274
|
-
|
275
|
-
|
276
|
-
|
277
|
-
|
278
|
-
|
279
|
-
|
280
|
-
|
281
|
-
|
282
|
-
|
283
|
-
|
284
|
-
|
285
|
-
|
286
|
-
|
287
|
-
|
288
|
-
|
289
|
-
|
290
|
-
|
291
|
-
|
292
|
-
|
293
|
-
|
294
|
-
<span class="js-keyword">var</span> <span class="js-variabledef">cc</span> = <span class="js-keyword">function</span>(){<span class="js-variable">scanNode</span>(<span class="js-localvariable">start</span>, <span class="js-variable">stop</span>);};
|
11
|
+
|
12
|
+
<h1 style="letter-spacing: -2px">Implementing a Syntax-Highlighting JavaScript Editor In JavaScript</h1>
|
13
|
+
|
14
|
+
<pre class="grey">
|
15
|
+
/* A brutal odyssey to the dark
|
16
|
+
side of the DOM tree */
|
17
|
+
</pre>
|
18
|
+
|
19
|
+
<div class="clear"><div class="leftbig blk">
|
20
|
+
|
21
|
+
<p>In one of his (very informative) <a
|
22
|
+
href="http://www.learnwebdesignonline.com/videos/programming/javascript/yahoo-douglas-crockford">video
|
23
|
+
lectures</a>, Douglas Crockford remarks that writing JavaScript
|
24
|
+
for the web is 'programming in a hostile environment'. I had done
|
25
|
+
my fair share of weird workarounds, and even occasonally gave up
|
26
|
+
an on idea entirely because browsers just wouldn't support it, but
|
27
|
+
before this project I never really realized just how powerless a
|
28
|
+
programmer can be in the face of buggy, incompatible, and poorly
|
29
|
+
designed platforms.</p>
|
30
|
+
|
31
|
+
<p>The plan was not ridiculously ambitious. I wanted to 'enhance' a
|
32
|
+
textarea to the point where writing code in it is pleasant. This meant
|
33
|
+
automatic indentation and, if possible at all, syntax highlighting.</p>
|
34
|
+
|
35
|
+
<p>In this document I describe the story of implementing this, for your
|
36
|
+
education and amusement. A demonstration of the resulting program,
|
37
|
+
along with the source code, can be found at <a
|
38
|
+
href="http://codemirror.net/">the project website</a>.</p>
|
39
|
+
|
40
|
+
<p style="color: #811; font-size: 90%; font-style: italic">Note:
|
41
|
+
some of the details given here no longer apply to the current <a
|
42
|
+
href="http://codemirror.net/">CodeMirror</a> codebase, which has
|
43
|
+
evolved quite a bit in the meantime.</p>
|
44
|
+
|
45
|
+
<h2 id="indent">Take one: Only indentation</h2>
|
46
|
+
|
47
|
+
<p>The very first attempt merely added auto-indentation to a textarea
|
48
|
+
element. It would scan backwards through the content of the area,
|
49
|
+
starting from the cursor, until it had enough information to decide
|
50
|
+
how to indent the current line. It took me a while to figure out a
|
51
|
+
decent model for indenting JavaScript code, but in the end this seems
|
52
|
+
to work:</p>
|
53
|
+
|
54
|
+
<ul>
|
55
|
+
<li>Code that sits inside a block is indented one unit (generally two
|
56
|
+
spaces) more than the statement or brace that opened the block.</li>
|
57
|
+
<li>A statement that is continued to the next line is indented one unit
|
58
|
+
more than the line that starts the statement.</li>
|
59
|
+
<li>When dealing with lists of arguments or the content of array and
|
60
|
+
object literals there are two possible models. If there is any text
|
61
|
+
directly after the opening brace, bracket, or parenthesis,
|
62
|
+
subsequent lines are aligned with this opening character. If the
|
63
|
+
opening character is followed by a newline (optionally with whitespace
|
64
|
+
or comments before it), the next line is indented one unit further
|
65
|
+
than the line that started the list.</li>
|
66
|
+
<li>And, obviously, if a statement follows another statement it is
|
67
|
+
indented the same amount as the one before it.</li>
|
68
|
+
</ul>
|
69
|
+
|
70
|
+
<p>When scanning backwards through code one has to take string values,
|
71
|
+
comments, and regular expressions (which are delimited by slashes)
|
72
|
+
into account, because braces and semicolons and such are not
|
73
|
+
significant when they appear inside them. Single-line ('//') comments
|
74
|
+
turned out to be rather inefficient to check for when doing a
|
75
|
+
backwards scan, since every time you encounter a newline you have to
|
76
|
+
go on to the next newline to determine whether this line ends in a
|
77
|
+
comment or not. Regular expressions are even worse ― without
|
78
|
+
contextual information they are impossible to distinguish from the
|
79
|
+
division operator, and I didn't get them working in this first
|
80
|
+
version.</p>
|
81
|
+
|
82
|
+
<p>To find out which line to indent, and to make sure that adding or
|
83
|
+
removing whitespace doesn't cause the cursor to jump in strange ways,
|
84
|
+
it is necessary to determine which text the user has selected. Even
|
85
|
+
though I was working with just a simple textarea at this point, this
|
86
|
+
was already a bit of a headache.</p>
|
87
|
+
|
88
|
+
<p>On W3C-standards-respecting browsers, textarea nodes have
|
89
|
+
<code>selectionStart</code> and <code>selectionEnd</code>
|
90
|
+
properties which nicely give you the amount of characters before
|
91
|
+
the start and end of the selection. Great!</p>
|
92
|
+
|
93
|
+
<p>Then, there is Internet Explorer. Internet Explorer also has an API
|
94
|
+
for looking at and manipulating selections. It gives you information
|
95
|
+
such as a detailed map of the space the selected lines take up on the
|
96
|
+
screen, in pixels, and of course the text inside the selection. It
|
97
|
+
does, however, not give you much of a clue on where the selection is
|
98
|
+
located in the document.</p>
|
99
|
+
|
100
|
+
<p>After some experimentation I managed to work out an elaborate
|
101
|
+
method for getting something similar to the
|
102
|
+
<code>selectionStart</code> and <code>selectionEnd</code> values
|
103
|
+
in other browsers. It worked like this:</p>
|
104
|
+
|
105
|
+
<ul>
|
106
|
+
<li>Get the <code>TextRange</code> object corresponding to the selection.</li>
|
107
|
+
<li>Record the length of the text inside it.</li>
|
108
|
+
<li>Make another <code>TextRange</code> that covers the whole textarea element.</li>
|
109
|
+
<li>Set the start of the first <code>TextRange</code> to the start of the second one.</li>
|
110
|
+
<li>Again get the length of the text in the first object.</li>
|
111
|
+
<li>Now <code>selectionEnd</code> is the second length, and <code>selectionStart</code> is
|
112
|
+
the second minus the first one.</li>
|
113
|
+
</ul>
|
114
|
+
|
115
|
+
<p>That seemed to work, but when resetting the selection after modifying
|
116
|
+
the content of the textarea I ran into another interesting feature of
|
117
|
+
these <code>TextRange</code>s: You can move their endpoints by a given number of
|
118
|
+
characters, which is useful when trying to set a cursor at the Nth
|
119
|
+
character of a textarea, but in this context, newlines are <em>not</em>
|
120
|
+
considered to be characters, so you'll always end up one character too
|
121
|
+
far for every newline you passed. Of course, you can count newlines
|
122
|
+
and compensate for this (though it is still not possible to position
|
123
|
+
the cursor right in front of a newline). Sheesh.</p>
|
124
|
+
|
125
|
+
<p>After ragging on Internet Explorer for a while, let us move on and rag
|
126
|
+
on Firefox a bit. It turns out that, in Firefox, getting and setting
|
127
|
+
the text content of a DOM element is unexplainably expensive,
|
128
|
+
especially when there is a lot of text involved. As soon as I tried to
|
129
|
+
use my indentation code to indent itself (some 400 lines), I found
|
130
|
+
myself waiting for over four seconds every time I pressed enter. That
|
131
|
+
seemed a little slow.</p>
|
132
|
+
|
133
|
+
<h2 id="designmode">designMode it is</h2>
|
134
|
+
|
135
|
+
<p>The solution was obvious: Since the text inside a textarea can only be
|
136
|
+
manipulated as one single big string, I had to spread it out over
|
137
|
+
multiple nodes. How do you spread editable content over multiple
|
138
|
+
nodes? Right! <code>designMode</code> or <code>contentEditable</code>.</p>
|
139
|
+
|
140
|
+
<p>Now I wasn't entirely naive about <code>designMode</code>, I had been looking
|
141
|
+
into writing a non-messy WYSIWYG editor before, and at that time I had
|
142
|
+
concluded two things:</p>
|
143
|
+
|
144
|
+
<ul>
|
145
|
+
<li>It is impossible to prevent the user from inserting whichever HTML
|
146
|
+
junk he wants into the document.</li>
|
147
|
+
<li>In Internet Explorer, it is extemely hard to get a good view
|
148
|
+
on what nodes the user has selected.</li>
|
149
|
+
</ul>
|
150
|
+
|
151
|
+
<p>Basically, the good folks at Microsoft designed a really bad interface
|
152
|
+
for putting editable documents in pages, and the other browsers, not
|
153
|
+
wanting to be left behind, more or less copied that. And there isn't
|
154
|
+
much hope for a better way to do this appearing anytime soon. Wise
|
155
|
+
people probably use a Flash movie or (God forbid) a Java applet for
|
156
|
+
these kind of things, though those are not without drawbacks either.</p>
|
157
|
+
|
158
|
+
<p>Anyway, seeing how using an editable document would also make syntax
|
159
|
+
highlighting possible, I foolishly went ahead. There is something
|
160
|
+
perversely fascinating about trying to build a complicated system on a
|
161
|
+
lousy, unsuitable platform.</p>
|
162
|
+
|
163
|
+
<h2 id="parser">A parser</h2>
|
164
|
+
|
165
|
+
<p>How does one do decent syntax highlighting? A very simple scanning can
|
166
|
+
tell the difference between strings, comments, keywords, and other
|
167
|
+
code. But this time I wanted to actually be able to recognize regular
|
168
|
+
expressions, so that I didn't have any blatant incorrect behaviour
|
169
|
+
anymore.</p>
|
170
|
+
|
171
|
+
<p>That brought me to the idea of doing a serious parse on the code. This
|
172
|
+
would not only make detecting regular expressions much easier, it
|
173
|
+
would also give me detailed information about the code, which can be
|
174
|
+
used to determine proper indentation levels, and to make subtle
|
175
|
+
distinctions in colouring, for example the difference between variable
|
176
|
+
names and property names.</p>
|
177
|
+
|
178
|
+
<p>And hey, when we're parsing the whole thing, it would even be possible
|
179
|
+
to make a distinction between local and global variables, and colour
|
180
|
+
them differently. If you've ever programmed JavaScript you can
|
181
|
+
probably imagine how useful this would be ― it is ridiculously easy
|
182
|
+
to accidentally create global instead of local variables. I don't
|
183
|
+
consider myself a JavaScript rookie anymore, but it was (embarrasingly
|
184
|
+
enough) only this week that I realized that my habit of typing <code>for
|
185
|
+
(name in object) ...</code> was creating a global variable <code>name</code>, and that
|
186
|
+
I should be typing <code>for (var name in object) ...</code> instead.</p>
|
187
|
+
|
188
|
+
<p>Re-parsing all the code the user has typed in every time he hits a key
|
189
|
+
is obviously not feasible. So how does one combine on-the-fly
|
190
|
+
highlighting with a serious parser? One option would be to split the
|
191
|
+
code into top-level statements (functions, variable definitions, etc.)
|
192
|
+
and parse these separately. This is horribly clunky though, especially
|
193
|
+
considering the fact that modern JavaScripters often put all the code
|
194
|
+
in a file in a single big object or function to prevent namespace
|
195
|
+
pollution.</p>
|
196
|
+
|
197
|
+
<p>I have always liked continuation-passing style and generators. So the
|
198
|
+
idea I came up with is this: An interruptable, resumable parser. This
|
199
|
+
is a parser that does not run through a whole document at once, but
|
200
|
+
parses on-demand, a little bit at a time. At any moment you can create
|
201
|
+
a copy of its current state, which can be resumed later. You start
|
202
|
+
parsing at the top of the code, and keep going as long as you like,
|
203
|
+
but throughout the document, for example at every end of line, you
|
204
|
+
store a copy of the current parser state. Later on, when line 106
|
205
|
+
changes, you grab the interrupted parser that was stored at the end of
|
206
|
+
line 105, and use it to re-parse line 106. It still knows exactly what
|
207
|
+
the context was at that point, which local variables were defined,
|
208
|
+
which unfinished statements were encountered, and so on.</p>
|
209
|
+
|
210
|
+
<p>But that, unfortunately, turned out to be not quite as easy as it
|
211
|
+
sounds.</p>
|
212
|
+
|
213
|
+
<h2 id="dom">The DOM nodes underfoot</h2>
|
214
|
+
|
215
|
+
<p>Of course, when working inside an editable frame we don't just
|
216
|
+
have to deal with text. The code will be represented by some kind
|
217
|
+
of DOM tree. My first idea was to set the <code>white-space:
|
218
|
+
pre</code> style for the frame and try to work with mostly text,
|
219
|
+
with the occasional coloured <code>span</code> element. It turned
|
220
|
+
out that support for <code>white-space: pre</code> in browsers,
|
221
|
+
especially in editable frames, is so hopelessly glitchy that this
|
222
|
+
was unworkable.</p>
|
223
|
+
|
224
|
+
<p>Next I tried a series of <code>div</code> elements, one per
|
225
|
+
line, with <code>span</code> elements inside them. This seemed to
|
226
|
+
nicely reflect the structure of the code in a shallowly
|
227
|
+
hierarchical way. I soon realized, however, that my code would be
|
228
|
+
much more straightfoward when using no hierarchy whatsoever
|
229
|
+
― a series of <code>span</code>s, with <code>br</code> tags
|
230
|
+
at the end of every line. This way, the DOM nodes form a flat
|
231
|
+
sequence that corresponds to the sequence of the text ―
|
232
|
+
just extract text from <code>span</code> nodes and substitute
|
233
|
+
newlines for <code>br</code> nodes.</p>
|
234
|
+
|
235
|
+
<p>It would be a shame if the editor would fall apart as soon as
|
236
|
+
someone pastes some complicated HTML into it. I wanted it to be
|
237
|
+
able to deal with whatever mess it finds. This means using some
|
238
|
+
kind of HTML-normalizer that takes arbitrary HTML and flattens it
|
239
|
+
into a series of <code>br</code>s and <code>span</code> elements
|
240
|
+
that contain a single text node. Just like the parsing process, it
|
241
|
+
would be best if this did not have to done to the entire buffer
|
242
|
+
every time something changes.</p>
|
243
|
+
|
244
|
+
<p>It took some banging my head against my keyboard, but I found a very
|
245
|
+
nice way to model this. It makes heavy use of generators, for which I
|
246
|
+
used <a href="http://www.mochikit.com">MochiKit</a>'s iterator
|
247
|
+
framework. Bob Ippolito explains the concepts in this library very
|
248
|
+
well in his <a
|
249
|
+
href="http://bob.pythonmac.org/archives/2005/07/06/iteration-in-javascript/">blog
|
250
|
+
post</a> about it. (Also notice some of the dismissive comments at the
|
251
|
+
bottom of that post. They say "I don't think I really want to learn
|
252
|
+
this, so I'll make up some silly reason to condemn it.")</p>
|
253
|
+
|
254
|
+
<p>The highlighting process consists of the following elements:
|
255
|
+
normalizing the DOM tree, extracting the text from the DOM tree,
|
256
|
+
tokenizing this text, parsing the tokens, and finally adjusting the
|
257
|
+
DOM nodes to reflect the structure of the code.</p>
|
258
|
+
|
259
|
+
<p>The first two, I put into a single generator. It scans the DOM
|
260
|
+
tree, fixing anything that is not a simple top-level
|
261
|
+
<code>span</code> or <code>br</code>, and it produces the text
|
262
|
+
content of the nodes (or a newline in case of a <code>br</code>)
|
263
|
+
as its output ― each time it is called, it yields a string.
|
264
|
+
Continuation passing style was a good way to model this process in
|
265
|
+
an iterator, which has to be processed one step at a time. Look at
|
266
|
+
this simplified version:</p>
|
267
|
+
|
268
|
+
<pre class="code" style="width: 110%"><span class="js-keyword">function</span> <span class="js-variable">traverseDOM</span>(<span class="js-variabledef">start</span>){
|
269
|
+
<span class="js-keyword">var</span> <span class="js-variabledef">cc</span> = <span class="js-keyword">function</span>(){<span class="js-keyword">return</span> <span class="js-variable">scanNode</span>(<span class="js-localvariable">start</span>, <span class="js-variable">stop</span>);};
|
295
270
|
<span class="js-keyword">function</span> <span class="js-variabledef">stop</span>(){
|
296
271
|
<span class="js-localvariable">cc</span> = <span class="js-localvariable">stop</span>;
|
297
272
|
<span class="js-keyword">throw</span> <span class="js-variable">StopIteration</span>;
|
@@ -300,157 +275,165 @@
|
|
300
275
|
<span class="js-localvariable">cc</span> = <span class="js-localvariable">c</span>;
|
301
276
|
<span class="js-keyword">return</span> <span class="js-localvariable">value</span>;
|
302
277
|
}
|
303
|
-
|
278
|
+
|
304
279
|
<span class="js-keyword">function</span> <span class="js-variabledef">scanNode</span>(<span class="js-variabledef">node</span>, <span class="js-variabledef">c</span>){
|
305
280
|
<span class="js-keyword">if</span> (<span class="js-localvariable">node</span>.<span class="js-property">nextSibling</span>)
|
306
|
-
<span class="js-keyword">var</span> <span class="js-variabledef">nextc</span> = <span class="js-keyword">function</span>(){<span class="js-localvariable">scanNode</span>(<span class="js-localvariable">node</span>.<span class="js-property">nextSibling</span>, <span class="js-localvariable">c</span>);};
|
307
|
-
<span class="js-keyword">else</span>
|
281
|
+
<span class="js-keyword">var</span> <span class="js-variabledef">nextc</span> = <span class="js-keyword">function</span>(){<span class="js-keyword">return</span> <span class="js-localvariable">scanNode</span>(<span class="js-localvariable">node</span>.<span class="js-property">nextSibling</span>, <span class="js-localvariable">c</span>);};
|
282
|
+
<span class="js-keyword">else</span>
|
308
283
|
<span class="js-keyword">var</span> <span class="js-variabledef">nextc</span> = <span class="js-localvariable">c</span>;
|
309
|
-
|
284
|
+
|
310
285
|
<span class="js-keyword">if</span> (<span class="js-comment">/* node is proper span element */</span>)
|
311
286
|
<span class="js-keyword">return</span> <span class="js-localvariable">yield</span>(<span class="js-localvariable">node</span>.<span class="js-property">firstChild</span>.<span class="js-property">nodeValue</span>, <span class="js-localvariable">nextc</span>);
|
312
287
|
<span class="js-keyword">else</span> <span class="js-keyword">if</span> (<span class="js-comment">/* node is proper br element */</span>)
|
313
288
|
<span class="js-keyword">return</span> <span class="js-localvariable">yield</span>(<span class="js-string">"\n"</span>, <span class="js-localvariable">nextc</span>);
|
314
|
-
<span class="js-keyword">else</span>
|
289
|
+
<span class="js-keyword">else</span>
|
315
290
|
<span class="js-comment">/* flatten node, yield its textual content */</span>;
|
316
291
|
}
|
317
|
-
|
292
|
+
|
318
293
|
<span class="js-keyword">return</span> {<span class="js-property">next</span>: <span class="js-keyword">function</span>(){<span class="js-keyword">return</span> <span class="js-localvariable">cc</span>();}};
|
319
|
-
}</pre>
|
320
|
-
|
321
|
-
|
322
|
-
|
323
|
-
|
324
|
-
|
325
|
-
|
326
|
-
|
327
|
-
|
328
|
-
|
329
|
-
|
330
|
-
|
331
|
-
|
332
|
-
|
333
|
-
|
334
|
-
|
335
|
-
|
336
|
-
|
337
|
-
|
338
|
-
|
339
|
-
|
340
|
-
|
341
|
-
|
342
|
-
|
343
|
-
|
344
|
-
|
345
|
-
|
346
|
-
|
347
|
-
|
348
|
-
|
349
|
-
|
350
|
-
|
351
|
-
|
352
|
-
|
353
|
-
|
354
|
-
|
355
|
-
|
356
|
-
|
357
|
-
|
358
|
-
|
359
|
-
|
360
|
-
|
361
|
-
|
362
|
-
|
363
|
-
|
364
|
-
|
365
|
-
|
366
|
-
|
367
|
-
|
368
|
-
|
369
|
-
|
370
|
-
|
371
|
-
|
372
|
-
|
373
|
-
|
374
|
-
|
375
|
-
|
376
|
-
|
377
|
-
|
378
|
-
|
379
|
-
|
380
|
-
|
381
|
-
|
382
|
-
|
383
|
-
|
384
|
-
|
385
|
-
|
386
|
-
|
387
|
-
|
388
|
-
|
389
|
-
|
390
|
-
|
391
|
-
|
392
|
-
|
393
|
-
|
394
|
-
|
395
|
-
|
396
|
-
|
397
|
-
|
398
|
-
|
399
|
-
|
400
|
-
|
401
|
-
|
402
|
-
|
403
|
-
<pre class="code"><span class="js-keyword">function</span> <span class="js-variable">expression</span>(<span class="js-variabledef">type</span>){
|
404
|
-
<span class="js-keyword">if</span> (<span class="js-localvariable">type</span> in <span class="js-variable">atomicTypes</span>)
|
405
|
-
|
406
|
-
<span class="js-keyword">else</span> <span class="js-keyword">if</span> (<span class="js-localvariable">type</span> == <span class="js-string">"
|
407
|
-
|
408
|
-
<span class="js-keyword">else</span> <span class="js-keyword">if</span> (<span class="js-localvariable">type</span> == <span class="js-string">"
|
409
|
-
|
410
|
-
<span class="js-keyword">else</span> <span class="js-keyword">if</span> (<span class="js-localvariable">type</span> == <span class="js-string">"
|
294
|
+
}</pre>
|
295
|
+
|
296
|
+
<p>The variable <code>c</code> stands for 'continuation', and <code>cc</code> for 'current
|
297
|
+
continuation' ― that last variable is used to store the function to
|
298
|
+
continue with, when yielding a value to the outside world. Every time
|
299
|
+
control leaves this function, it has to make sure that <code>cc</code> is set to
|
300
|
+
a suitable value, which is what <code>yield</code> and <code>stop</code> take care of.</p>
|
301
|
+
|
302
|
+
<p>The object that is returned contains a <code>next</code> method, which is
|
303
|
+
MochiKit's idea of an iterator, and the initial continuation just
|
304
|
+
throws a <code>StopIteration</code>, which is how MochiKit signals that an
|
305
|
+
iterator has reached its end.</p>
|
306
|
+
|
307
|
+
<p>The first lines of <code>scanNode</code> extend the continuation with the task of
|
308
|
+
scanning the next node, if there is a next node. The rest of the
|
309
|
+
function decides what kind of value to <code>yield</code>. Note that this is a
|
310
|
+
rather trivial example of this technique, since the process of going
|
311
|
+
through these nodes is basically linear (it was much, much more
|
312
|
+
complex in earlier versions), but still the trick with the
|
313
|
+
continuations makes the code shorter and, for those in the know,
|
314
|
+
clearer than the equivalent 'storing the iterator state in variables'
|
315
|
+
approach.</p>
|
316
|
+
|
317
|
+
<p>The next iterator that the input passes through is the
|
318
|
+
tokenizer. Well, actually, there is another iterator in between
|
319
|
+
that isolates the tokenizer from the fact that the DOM traversal
|
320
|
+
yields a bunch of separate strings, and presents them as a single
|
321
|
+
character stream (with a convenient <code>peek</code> operation),
|
322
|
+
but this is not a very interesting one. What the tokenizer returns
|
323
|
+
is a stream of token objects, each of which has a
|
324
|
+
<code>value</code>, its textual content, a <code>type</code>, like
|
325
|
+
<code>"variable"</code>, <code>"operator"</code>, or just itself,
|
326
|
+
<code>"{"</code> for example, in the case of significant
|
327
|
+
punctuation or special keywords. They also have a
|
328
|
+
<code>style</code>, which is used later by the highlighter to give
|
329
|
+
their <code>span</code> elements a class name (the parser will
|
330
|
+
still adjust this in some cases).</p>
|
331
|
+
|
332
|
+
<p>At first I assumed the parser would have to talk back to the
|
333
|
+
tokenizer about the current context, in order to be able to
|
334
|
+
distinguish those accursed regular expressions from divisions, but
|
335
|
+
it seems that regular expressions are only allowed if the previous
|
336
|
+
(non-whitespace, non-comment) token was either an operator, a
|
337
|
+
keyword like <code>new</code> or <code>throw</code>, or a specific
|
338
|
+
kind of punctuation (<code>"[{}(,;:"</code>) that indicates a new
|
339
|
+
expression can be started here. This made things considerably
|
340
|
+
easier, since the 'regexp or no regexp' question could stay
|
341
|
+
entirely within the tokenizer.</p>
|
342
|
+
|
343
|
+
<p>The next step, then, is the parser. It does not do a very
|
344
|
+
thorough job because, firstly, it has to be fast, and secondly, it
|
345
|
+
should not go to pieces when fed an incorrect program. So only
|
346
|
+
superficial constructs are recognized, keywords that resemble each
|
347
|
+
other in syntax, such as <code>while</code> and <code>if</code>,
|
348
|
+
are treated in precisely the same way, as are <code>try</code> and
|
349
|
+
<code>else</code> ― the parser doesn't mind if an
|
350
|
+
<code>else</code> appears without an <code>if</code>. Stuff that
|
351
|
+
binds variables, <code>var</code>, <code>function</code>, and
|
352
|
+
<code>catch</code> to be precise, is treated with more care,
|
353
|
+
because the parser wants to know about local variables.</p>
|
354
|
+
|
355
|
+
<p>Inside the parser, three kinds of context are stored. Firstly, a set
|
356
|
+
of known local variables, which is used to adjust the style of
|
357
|
+
variable tokens. Every time the parser enters a function, a new set of
|
358
|
+
variables is created. If there was already such a set (entering an
|
359
|
+
inner function), a pointer to the old one is stored in the new one. At
|
360
|
+
the end of the function, the current variable set is 'popped' off and
|
361
|
+
the previous one is restored.</p>
|
362
|
+
|
363
|
+
<p>The second kind of context is the lexical context, this keeps track of
|
364
|
+
whether we are inside a statement, block, or list. Like the variable
|
365
|
+
context, it also forms a stack of contexts, with each one containing a
|
366
|
+
pointer to the previous ones so that they can be popped off again when
|
367
|
+
they are finished. This information is used for indentation. Every
|
368
|
+
time the parser encounters a newline token, it attaches the current
|
369
|
+
lexical context and a 'copy' of itself (more about that later) to this
|
370
|
+
token.</p>
|
371
|
+
|
372
|
+
<p>The third context is a continuation context. This parser does not use
|
373
|
+
straight continuation style, instead it uses a stack of actions that
|
374
|
+
have to be performed. These actions are simple functions, a kind of
|
375
|
+
minilanguage, they act on tokens, and decide what kind of new actions
|
376
|
+
should be pushed onto the stack. Here are some examples:</p>
|
377
|
+
|
378
|
+
<pre class="code" style="width: 110%"><span class="js-keyword">function</span> <span class="js-variable">expression</span>(<span class="js-variabledef">type</span>){
|
379
|
+
<span class="js-keyword">if</span> (<span class="js-localvariable">type</span> in <span class="js-variable">atomicTypes</span>)
|
380
|
+
<span class="js-variable">cont</span>(<span class="js-variable">maybeoperator</span>);
|
381
|
+
<span class="js-keyword">else</span> <span class="js-keyword">if</span> (<span class="js-localvariable">type</span> == <span class="js-string">"function"</span>)
|
382
|
+
<span class="js-variable">cont</span>(<span class="js-variable">functiondef</span>);
|
383
|
+
<span class="js-keyword">else</span> <span class="js-keyword">if</span> (<span class="js-localvariable">type</span> == <span class="js-string">"("</span>)
|
384
|
+
<span class="js-variable">cont</span>(<span class="js-variable">pushlex</span>(<span class="js-string">"list"</span>), <span class="js-variable">expression</span>, <span class="js-variable">expect</span>(<span class="js-string">")"</span>), <span class="js-variable">poplex</span>);
|
385
|
+
<span class="js-keyword">else</span> <span class="js-keyword">if</span> (<span class="js-localvariable">type</span> == <span class="js-string">"operator"</span>)
|
386
|
+
<span class="js-variable">cont</span>(<span class="js-variable">expression</span>);
|
387
|
+
<span class="js-keyword">else</span> <span class="js-keyword">if</span> (<span class="js-localvariable">type</span> == <span class="js-string">"["</span>)
|
388
|
+
<span class="js-variable">cont</span>(<span class="js-variable">pushlex</span>(<span class="js-string">"list"</span>), <span class="js-variable">commasep</span>(<span class="js-variable">expression</span>), <span class="js-variable">expect</span>(<span class="js-string">"]"</span>), <span class="js-variable">poplex</span>);
|
389
|
+
<span class="js-keyword">else</span> <span class="js-keyword">if</span> (<span class="js-localvariable">type</span> == <span class="js-string">"{"</span>)
|
390
|
+
<span class="js-variable">cont</span>(<span class="js-variable">pushlex</span>(<span class="js-string">"list"</span>), <span class="js-variable">commasep</span>(<span class="js-variable">objprop</span>), <span class="js-variable">expect</span>(<span class="js-string">"}"</span>), <span class="js-variable">poplex</span>);
|
391
|
+
<span class="js-keyword">else</span> <span class="js-keyword">if</span> (<span class="js-localvariable">type</span> == <span class="js-string">"keyword c"</span>)
|
392
|
+
<span class="js-variable">cont</span>(<span class="js-variable">expression</span>);
|
411
393
|
}
|
412
|
-
|
394
|
+
|
413
395
|
<span class="js-keyword">function</span> <span class="js-variable">block</span>(<span class="js-variabledef">type</span>){
|
414
396
|
<span class="js-keyword">if</span> (<span class="js-localvariable">type</span> == <span class="js-string">"}"</span>) <span class="js-variable">cont</span>();
|
415
397
|
<span class="js-keyword">else</span> <span class="js-variable">pass</span>(<span class="js-variable">statement</span>, <span class="js-variable">block</span>);
|
416
398
|
}</pre>
|
417
399
|
|
418
|
-
|
419
|
-
|
420
|
-
|
421
|
-
|
422
|
-
|
423
|
-
|
424
|
-
|
425
|
-
|
426
|
-
|
427
|
-
|
428
|
-
|
429
|
-
|
430
|
-
|
431
|
-
|
432
|
-
|
433
|
-
|
434
|
-
|
435
|
-
|
436
|
-
|
437
|
-
|
438
|
-
|
439
|
-
|
440
|
-
|
441
|
-
|
442
|
-
|
443
|
-
|
444
|
-
|
445
|
-
|
446
|
-
|
447
|
-
|
448
|
-
|
449
|
-
|
450
|
-
|
451
|
-
<pre class="code"><span class="js-keyword">function</span> <span class="js-variable">copy</span>(){
|
452
|
-
<span class="js-keyword">var</span> <span class="js-variabledef">_context</span> = <span class="js-variable">context</span>, <span class="js-variabledef">_lexical</span> = <span class="js-variable">lexical</span>,
|
453
|
-
|
400
|
+
<p>The function <code>cont</code> (for continue), will push the actions it is given
|
401
|
+
onto the stack (in reverse order, so that the first one will be popped
|
402
|
+
first). Actions such as <code>pushlex</code> and <code>poplex</code> merely adjust the
|
403
|
+
lexical environment, while others, such as <code>expression</code> itself, do
|
404
|
+
actual parsing. <code>pass</code>, as seen in <code>block</code>, is similar to <code>cont</code>, but
|
405
|
+
it does not 'consume' the current token, so the next action will again
|
406
|
+
see this same token. In <code>block</code>, this happens when the function
|
407
|
+
determines that we are not at the end of the block yet, so it pushes
|
408
|
+
the <code>statement</code> function which will interpret the current token as the
|
409
|
+
start of a statement.</p>
|
410
|
+
|
411
|
+
<p>These actions are called by a 'driver' function, which filters out the
|
412
|
+
whitespace and comments, so that the parser actions do not have to
|
413
|
+
think about those, and keeps track of some things like the indentation
|
414
|
+
of the current line and the column at which the current token ends,
|
415
|
+
which are stored in the lexical context and used for indentation.
|
416
|
+
After calling an action, if the action called <code>cont</code>, this driver
|
417
|
+
function will return the current token, if <code>pass</code> (or nothing) was
|
418
|
+
called, it will immediately continue with the next action.</p>
|
419
|
+
|
420
|
+
<p>This goes to show that it is viable to write a quite elaborate
|
421
|
+
minilanguage in a macro-less language like JavaScript. I don't think
|
422
|
+
it would be possible to do something like this without closures (or
|
423
|
+
similarly powerful abstraction) though, I've certainly never seen
|
424
|
+
anything like it in Java code.</p>
|
425
|
+
|
426
|
+
<p>The way a 'copy' of the parser was produced shows a nice usage
|
427
|
+
of closures. Like with the DOM transformer shown above, most of
|
428
|
+
the local state of the parser is held in a closure produced by
|
429
|
+
calling <code>parse(stream)</code>. The function
|
430
|
+
<code>copy</code>, which is local to the parser function, produces
|
431
|
+
a new closure, with copies of all the relevant variables:</p>
|
432
|
+
|
433
|
+
<pre class="code"><span class="js-keyword">function</span> <span class="js-variable">copy</span>(){
|
434
|
+
<span class="js-keyword">var</span> <span class="js-variabledef">_context</span> = <span class="js-variable">context</span>, <span class="js-variabledef">_lexical</span> = <span class="js-variable">lexical</span>,
|
435
|
+
<span class="js-variabledef">_actions</span> = <span class="js-variable">copyArray</span>(<span class="js-variable">actions</span>);
|
436
|
+
|
454
437
|
<span class="js-keyword">return</span> <span class="js-keyword">function</span>(<span class="js-variabledef">_tokens</span>){
|
455
438
|
<span class="js-variable">context</span> = <span class="js-localvariable">_context</span>;
|
456
439
|
<span class="js-variable">lexical</span> = <span class="js-localvariable">_lexical</span>;
|
@@ -460,195 +443,229 @@
|
|
460
443
|
};
|
461
444
|
}</pre>
|
462
445
|
|
463
|
-
|
464
|
-
|
465
|
-
|
466
|
-
|
467
|
-
|
468
|
-
|
469
|
-
|
470
|
-
|
471
|
-
|
472
|
-
|
473
|
-
|
474
|
-
|
475
|
-
|
476
|
-
|
477
|
-
|
478
|
-
|
479
|
-
|
480
|
-
|
481
|
-
|
482
|
-
|
483
|
-
|
484
|
-
|
485
|
-
|
486
|
-
|
487
|
-
|
488
|
-
|
489
|
-
|
490
|
-
|
491
|
-
|
492
|
-
|
493
|
-
|
494
|
-
|
495
|
-
|
496
|
-
|
497
|
-
|
498
|
-
|
499
|
-
|
500
|
-
|
501
|
-
|
502
|
-
|
503
|
-
|
504
|
-
|
505
|
-
|
506
|
-
|
507
|
-
|
508
|
-
|
509
|
-
|
510
|
-
|
511
|
-
|
512
|
-
|
513
|
-
|
514
|
-
|
515
|
-
|
516
|
-
|
517
|
-
|
518
|
-
|
519
|
-
|
520
|
-
|
521
|
-
|
522
|
-
|
523
|
-
|
524
|
-
|
525
|
-
|
526
|
-
|
527
|
-
|
528
|
-
|
529
|
-
|
530
|
-
|
531
|
-
|
532
|
-
|
533
|
-
|
534
|
-
|
535
|
-
|
536
|
-
|
537
|
-
|
538
|
-
|
539
|
-
|
540
|
-
|
541
|
-
|
542
|
-
|
543
|
-
|
544
|
-
|
545
|
-
|
546
|
-
|
547
|
-
|
548
|
-
|
549
|
-
|
550
|
-
|
551
|
-
|
552
|
-
|
553
|
-
|
554
|
-
|
555
|
-
|
556
|
-
|
557
|
-
|
558
|
-
|
559
|
-
|
560
|
-
|
561
|
-
|
562
|
-
|
563
|
-
|
564
|
-
|
565
|
-
|
566
|
-
|
567
|
-
|
568
|
-
|
569
|
-
|
570
|
-
|
571
|
-
|
572
|
-
|
573
|
-
|
574
|
-
|
575
|
-
|
576
|
-
|
577
|
-
|
578
|
-
|
579
|
-
|
580
|
-
|
581
|
-
|
582
|
-
|
583
|
-
|
584
|
-
|
585
|
-
|
586
|
-
|
587
|
-
|
588
|
-
|
589
|
-
|
590
|
-
|
591
|
-
|
592
|
-
|
593
|
-
|
594
|
-
|
595
|
-
|
596
|
-
|
597
|
-
|
598
|
-
|
599
|
-
|
600
|
-
|
601
|
-
|
602
|
-
|
603
|
-
|
604
|
-
|
605
|
-
|
606
|
-
|
607
|
-
|
608
|
-
|
609
|
-
|
610
|
-
|
611
|
-
|
612
|
-
|
613
|
-
|
614
|
-
|
615
|
-
|
616
|
-
|
617
|
-
|
618
|
-
|
619
|
-
|
620
|
-
|
621
|
-
|
622
|
-
|
623
|
-
|
624
|
-
|
625
|
-
|
626
|
-
|
627
|
-
|
628
|
-
|
629
|
-
|
630
|
-
|
631
|
-
|
632
|
-
|
633
|
-
|
634
|
-
|
635
|
-
|
636
|
-
|
637
|
-
|
638
|
-
|
639
|
-
|
640
|
-
|
641
|
-
|
642
|
-
|
643
|
-
|
644
|
-
|
645
|
-
|
646
|
-
|
647
|
-
|
648
|
-
|
649
|
-
|
650
|
-
|
651
|
-
|
446
|
+
<p>Where <code>parser</code> is the object that contains the <code>next</code> (driver)
|
447
|
+
function, and a reference to this <code>copy</code> function. When the function
|
448
|
+
that <code>copy</code> produces is called with a token stream as argument, it
|
449
|
+
updates the local variables in the parser closure, and returns the
|
450
|
+
corresponding iterator object.</p>
|
451
|
+
|
452
|
+
<p>Moving on, we get to the last stop in this chain of generators, the
|
453
|
+
actual highlighter. You can view this one as taking two streams as
|
454
|
+
input, on the one hand there is the stream of tokens from the parser,
|
455
|
+
and on the other hand there is the DOM tree as left by the DOM
|
456
|
+
transformer. If everything went correctly, these two should be
|
457
|
+
synchronized. The highlighter can look at the current token, see if
|
458
|
+
the <code>span</code> in the DOM tree corresponds to it (has the same text
|
459
|
+
content, and the correct class), and if not it can chop up the DOM
|
460
|
+
nodes to conform to the tokens.</p>
|
461
|
+
|
462
|
+
<p>Every time the parser yields a newline token, the highligher
|
463
|
+
encounters a <code>br</code> element in the DOM stream. It takes the copy of the
|
464
|
+
parser and the lexical context from this token and attaches them to
|
465
|
+
the DOM node. This way, a new highlighting process can be started from
|
466
|
+
that node by re-starting the copy of the parser with a new token
|
467
|
+
stream, which reads tokens from the DOM nodes starting at that <code>br</code>
|
468
|
+
element, and the indentation code can use the lexical context
|
469
|
+
information to determine the correct indentation at that point.</p>
|
470
|
+
|
471
|
+
<h2 id="selection">Selection woes</h2>
|
472
|
+
|
473
|
+
<p>All the above can be done using the DOM interface that all major
|
474
|
+
browsers have in common, and which is relatively free of weird bugs
|
475
|
+
and abberrations. However, when the user is typing in new code, this
|
476
|
+
must also be highlighted. For this to happen, the program must know
|
477
|
+
where the cursor currently is, and because it mucks up the DOM tree,
|
478
|
+
it has to restore this cursor position after doing the highlighting.</p>
|
479
|
+
|
480
|
+
<p>Re-highlighting always happens per line, because the copy of the
|
481
|
+
parser is stored only at the end of lines. Doing this every time the
|
482
|
+
user presses a key is terribly slow and obnoxious, so what I did was
|
483
|
+
keep a list of 'dirty' nodes, and as soon as the user didn't type
|
484
|
+
anyting for 300 milliseconds the program starts re-highlighting these
|
485
|
+
nodes. If it finds more than ten lines must be re-parsed, it does only
|
486
|
+
ten and waits another 300 milliseconds before it continues, this way
|
487
|
+
the browser never freezes up entirely.</p>
|
488
|
+
|
489
|
+
<p>As mentioned earlier, Internet Explorer's selection model is not the
|
490
|
+
most practical one. My attempts to build a wrapper that makes it look
|
491
|
+
like the W3C model all stranded. In the end I came to the conclusion
|
492
|
+
that I only needed two operations:</p>
|
493
|
+
|
494
|
+
<ul>
|
495
|
+
<li>Creating a selection 'snapshot' that can be restored after
|
496
|
+
highlighting, in such a way that it still works if some of the nodes
|
497
|
+
that were selected are replaced by other nodes with the same
|
498
|
+
size but a different structure.</li>
|
499
|
+
<li>Finding the top-level node around or before the cursor, to mark it
|
500
|
+
dirty or to insert indentation whitespace at the start of that line.</li>
|
501
|
+
</ul>
|
502
|
+
|
503
|
+
<p>It turns out that the pixel-based selection model that Internet
|
504
|
+
Explorer uses, which always seemed completely ludricrous to me, is
|
505
|
+
perfect for the first case. Since the DOM transformation (generally)
|
506
|
+
does not change the position of things, storing the pixel offsets of
|
507
|
+
the selection makes it possible to restore that same selection, never
|
508
|
+
mind what happened to the underlying DOM structure.</p>
|
509
|
+
|
510
|
+
<p>[Later addition: Note that this, due to the very random design
|
511
|
+
of the <a
|
512
|
+
href="http://msdn2.microsoft.com/en-us/library/ms535872(VS.85).aspx#">TextRange
|
513
|
+
interface</a>, only really works when the whole selection falls
|
514
|
+
within the visible part of the document.]</p>
|
515
|
+
|
516
|
+
<p>Doing the same with the W3C selection model is a lot harder. What I
|
517
|
+
ended up with was this:</p>
|
518
|
+
|
519
|
+
<ul>
|
520
|
+
<li>Create an object pointing to the nodes at the start and end of the
|
521
|
+
selection, and the offset within those nodes. This is basically the
|
522
|
+
information that the <code>Range</code> object gives you.</li>
|
523
|
+
<li>Make references from these nodes back to that object.</li>
|
524
|
+
<li>When replacing (part of) a node with another one, check for such a
|
525
|
+
reference, and when it is present, check whether this new node will
|
526
|
+
get the selection. If it does, move the reference from the old to the
|
527
|
+
new node, if it does not, adjust the offset in the selection object to
|
528
|
+
reflect the fact that part of the old node has been replaced.</li>
|
529
|
+
</ul>
|
530
|
+
|
531
|
+
<p>Now in the second case (getting the top-level node at the
|
532
|
+
cursor) the Internet Explorer cheat does not work. In the W3C
|
533
|
+
model this is rather easy, you have to do some creative parent-
|
534
|
+
and sibling-pointer following to arrive at the correct top-level
|
535
|
+
node, but nothing weird. In Internet Explorer, all we have to go
|
536
|
+
on is the <code>parentElement</code> method on a
|
537
|
+
<code>TextRange</code>, which gives the first element that
|
538
|
+
completely envelops the selection. If the cursor is inside a text
|
539
|
+
node, this is good, that text node tells us where we are. If the
|
540
|
+
cursor is between nodes, for example between two <code>br</code>
|
541
|
+
nodes, you get to top-level node itself back, which is remarkably
|
542
|
+
useless. In cases like this I stoop to a rather ugly hack (which
|
543
|
+
fortunately turned out to be acceptably fast) ― I create a
|
544
|
+
temporary empty <code>span</code> with an ID inside the selection,
|
545
|
+
get a reference to this <code>span</code> by ID, take its
|
546
|
+
<code>previousSibling</code>, and remove it again.</p>
|
547
|
+
|
548
|
+
<p>Unfortunately, Opera's selection implementation is buggy, and it
|
549
|
+
will give wildly incorrect <code>Range</code> objects when the cursor
|
550
|
+
is between two nodes. This is a bit of a showstopper, and until I find
|
551
|
+
a workaround for that or it gets fixed, the highlighter doesn't work
|
552
|
+
properly in Opera.</p>
|
553
|
+
|
554
|
+
<p>Also, when one presses enter in a <code>designMode</code>
|
555
|
+
document in Firefox or Opera, a <code>br</code> tag is inserted.
|
556
|
+
In Internet Explorer, pressing enter causes some maniacal gnome to
|
557
|
+
come out and start wrapping all the content before and after the
|
558
|
+
cursor in <code>p</code> tags. I suppose there is something to be
|
559
|
+
said for that, in principle, though if you saw the tag soup of
|
560
|
+
<code>font</code>s and nested paragraphs Internet Explorer
|
561
|
+
generates you would soon enough forget all about principle.
|
562
|
+
Anyway, getting unwanted <code>p</code> tags slowed the
|
563
|
+
highlighter down terribly ― it had to overhaul the whole
|
564
|
+
DOM tree to remove them again, every time the user pressed enter.
|
565
|
+
Fortunately I could fix this by capturing the enter presses and
|
566
|
+
manually inserting a <code>br</code> tag at the cursor.</p>
|
567
|
+
|
568
|
+
<p>On the subject of Internet Explorer's tag soup, here is an interesting
|
569
|
+
anecdote: One time, when testing the effect that modifying the content
|
570
|
+
of a selection had, I inspected the DOM tree and found a <code>"/B"</code>
|
571
|
+
element. This was not a closing tag, there are no closing tags in the
|
572
|
+
DOM tree, just elements. The <code>nodeName</code> of this element was actually
|
573
|
+
<code>"/B"</code>. That was when I gave up any notions of ever understanding the
|
574
|
+
profound mystery that is Internet Explorer.</p>
|
575
|
+
|
576
|
+
<h2 id="closing">Closing thoughts</h2>
|
577
|
+
|
578
|
+
<p>Well, I despaired at times, but I did end up with a working JavaScript
|
579
|
+
editor. I did not keep track of the amount of time I wasted on this,
|
580
|
+
but I would estimate it to be around fifty hours. Finding workarounds
|
581
|
+
for browser bugs can be a terribly nonlinear process. I just spent
|
582
|
+
half a day working on a weird glitch in Firefox that caused the cursor
|
583
|
+
in the editable frame to be displayed 3/4 line too high when it was at
|
584
|
+
the very end of the document. Then I found out that setting the
|
585
|
+
style.display of the iframe to "block" fixed this (why not?). I'm
|
586
|
+
amazed how often issues that seem hopeless do turn out to be
|
587
|
+
avoidable, even if it takes hours of screwing around and some truly
|
588
|
+
non-obvious ideas.</p>
|
589
|
+
|
590
|
+
<p>For a lot of things, JavaScript + DOM elements are a surprisingly
|
591
|
+
powerful platform. Simple interactive documents and forms can be
|
592
|
+
written in browsers with very little effort, generally less than with
|
593
|
+
most 'traditional' platforms (Java, Win32, things like WxWidgets).
|
594
|
+
Libraries like Dojo (and a similar monster I once wrote myself) even
|
595
|
+
make complex, composite widgets workable. However, when applications
|
596
|
+
go sufficiently beyond the things that browsers were designed for, the
|
597
|
+
available APIs do not give enough control, are nonstandard and buggy,
|
598
|
+
and are often poorly designed. Because of this, writing such
|
599
|
+
applications, when it is even possible, is <em>painful</em> process.</p>
|
600
|
+
|
601
|
+
<p>And who likes pain? Sure, when finding that crazy workaround,
|
602
|
+
subdueing the damn browser, and getting everything to work, there
|
603
|
+
is a certain macho thrill. But one can't help wondering how much
|
604
|
+
easier things like preventing the user from pasting pictures in
|
605
|
+
his source code would be on another platform. Maybe something like
|
606
|
+
Silverlight or whatever other new browser plugin gizmos people are
|
607
|
+
pushing these days will become the way to solve things like this
|
608
|
+
in the future. But, personally, I would prefer for those browser
|
609
|
+
companies to put some real effort into things like cleaning up and
|
610
|
+
standardising shady things like <code>designMode</code>, fixing
|
611
|
+
their bugs, and getting serious about ECMAScript 4.</p>
|
612
|
+
|
613
|
+
<p>Which is probably not realistically going to happen anytime soon.</p>
|
614
|
+
|
615
|
+
<hr/>
|
616
|
+
|
617
|
+
<p>Some interesting projects similar to this:</p>
|
618
|
+
|
619
|
+
<ul>
|
620
|
+
<li><a href="http://www.ymacs.org">Ymacs</a></li>
|
621
|
+
<li><a href="http://gpl.internetconnection.net/vi/">vi clone</a></li>
|
622
|
+
<li><a href="http://robrohan.com/projects/9ne/">Emacs clone</a></li>
|
623
|
+
<li><a href="http://codepress.sourceforge.net/">CodePress</a></li>
|
624
|
+
<li><a href="http://www.codeide.com">CodeIDE</a></li>
|
625
|
+
<li><a href="http://www.cdolivet.net/editarea">EditArea</a></li>
|
626
|
+
</ul>
|
627
|
+
|
628
|
+
<hr/>
|
629
|
+
|
630
|
+
<p>If you have any remarks, criticism, or hints related to the
|
631
|
+
above, drop me an e-mail at <a
|
632
|
+
href="mailto:marijnh@gmail.com">marijnh@gmail.com</a>. If you say
|
633
|
+
something generally interesting, I'll include your reaction here
|
634
|
+
at the bottom of this page.</p>
|
635
|
+
|
636
|
+
</div><div class="rightsmall blk">
|
637
|
+
|
638
|
+
<p style="font-size: 80%">
|
639
|
+
<b>Topic</b>: JavaScript, advanced browser weirdness, cool programming techniques<br/>
|
640
|
+
<b>Audience</b>: Programmers, especially JavaScript programmers<br/>
|
641
|
+
<b>Author</b>: Marijn Haverbeke<br/>
|
642
|
+
<b>Date</b>: May 24th 2007
|
643
|
+
</p>
|
644
|
+
|
645
|
+
<h2>Contents</h2>
|
646
|
+
|
647
|
+
<ul>
|
648
|
+
<li><a href="#indent">Only Indentation</a></li>
|
649
|
+
<li><a href="#designmode"><code>designMode</code></a></li>
|
650
|
+
<li><a href="#parser">A Parser</a></li>
|
651
|
+
<li><a href="#dom">DOM Nodes</a></li>
|
652
|
+
<li><a href="#selection">Selection Woes</a></li>
|
653
|
+
<li><a href="#closing">Closing Thoughts</a></li>
|
654
|
+
</ul>
|
655
|
+
|
656
|
+
<h2>Site</h2>
|
657
|
+
|
658
|
+
<ul>
|
659
|
+
<li><a href="index.html">Front Page</a></li>
|
660
|
+
<li><a href="manual.html">User Manual</a></li>
|
661
|
+
<li><a href="faq.html">FAQ</a></li>
|
662
|
+
<li><a href="http://groups.google.com/group/codemirror">Google Group</a></li>
|
663
|
+
<li><a href="compress.html">Compression Helper</a></li>
|
664
|
+
</ul>
|
665
|
+
|
666
|
+
</div></div>
|
667
|
+
|
668
|
+
<div style="height: 2em"> </div>
|
652
669
|
|
653
670
|
</body>
|
654
671
|
</html>
|