rack-seo 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,97 @@
1
+ require 'summarize'
2
+ require File.expand_path(File.dirname(__FILE__) + '/spec_helper')
3
+ describe "RackSeo Configuration" do
4
+ before do
5
+ @env = Rack::MockRequest.env_for '/'
6
+ end
7
+
8
+ it "reads a configuration file specified in the initializer" do
9
+ @rack_seo = Rack::RackSeo::Base.new Apps.complex, :public => Fixtures.path, :config => "config/rack_seo.default.yml"
10
+ end
11
+
12
+ context "happy config file" do
13
+ before do
14
+ @rack_seo = Rack::RackSeo::Base.new Apps.complex, :public => Fixtures.path, :config => "spec/sample_configs/happy.yml"
15
+ @happy_page = Fixtures.complex
16
+ @rack_seo.execute! @happy_page
17
+
18
+ @rack_seo_default = Rack::RackSeo::Base.new Apps.complex, :public => Fixtures.path
19
+ @default_page = Fixtures.complex_copy
20
+ @rack_seo_default.execute! @default_page
21
+ end
22
+ it "allows the title text to be configured" do
23
+ @happy_page.title_content.should include "#{(@happy_page.at_css('h1').text)} - Happy happy"
24
+ end
25
+ it "allows the meta description text source material to be narrowed down by a selector" do
26
+ @happy_page.at_css("meta[name='description']").attr('content').should_not == @default_page.at_css("meta[name='description']").attr('content')
27
+ end
28
+ it "allows the meta keywords source material to be narrowed down by a selector" do
29
+ @happy_page.at_css("meta[name='keywords']").attr('content').should_not == @default_page.at_css("meta[name='keywords']").attr('content')
30
+ end
31
+ end
32
+
33
+ context "sad config file" do
34
+ before do
35
+ @rack_seo = Rack::RackSeo::Base.new Apps.simple, :public => Fixtures.path, :config => "spec/sample_configs/sad.yml"
36
+ @sad_page = Fixtures.simple
37
+ @rack_seo.execute! @sad_page
38
+ end
39
+ it "fails gracefully with a bad title, description or keyword selector" do
40
+ inner_app = lambda { |env| [200, {'Content-Type' => 'text/html'}, [@sad_page.to_html]] }
41
+ status, headers, body = @rack_seo.call(@env)
42
+ status.should == 200
43
+ #TODO double check this test
44
+ end
45
+ end
46
+
47
+ context "configuring formats based on paths" do
48
+ before do
49
+ @page = Fixtures.complex
50
+ @rack_seo = Rack::RackSeo::Base.new Apps.complex, :public => Fixtures.path
51
+ @env = Rack::MockRequest.env_for '/'
52
+ status, headers, body = @rack_seo.call(@env)
53
+ @response_body = Rack::RackSeo::Document.new(body.first)
54
+
55
+ @page_test = Fixtures.complex_copy
56
+ @rack_seo_test = Rack::RackSeo::Base.new Apps.complex, :public => Fixtures.path, :config => "spec/sample_configs/custom_paths.yml"
57
+ end
58
+
59
+ context "matching path based on a string" do
60
+ before do
61
+ env_test = Rack::MockRequest.env_for '/test-path'
62
+ status, headers, body = @rack_seo_test.call(env_test)
63
+ @response_body_test = Rack::RackSeo::Document.new(body.first)
64
+ end
65
+
66
+ it "allows title_format to be configured for a certain path" do
67
+ @response_body_test.title_tag.should_not == @response_body.title_tag
68
+ end
69
+ it "allows meta_description_selector to be configured for a certain path" do
70
+ @response_body_test.description_content.should_not == @response_body.description_content
71
+ end
72
+ it "allows meta_keywords_selector to be configured for a certain path" do
73
+ @response_body_test.keywords_content.should_not == @response_body.keywords_content
74
+ end
75
+ end
76
+
77
+ context "matching path based on a Regexp" do
78
+ before do
79
+ env_test = Rack::MockRequest.env_for '/test-regex-two/subfolder'
80
+ status, headers, body = @rack_seo_test.call(env_test)
81
+ @response_body_test = Rack::RackSeo::Document.new(body.first)
82
+ end
83
+
84
+ it "allows title_format to be configured for a certain path" do
85
+ @response_body_test.title_tag.should_not == @response_body.title_tag
86
+ @response_body_test.title_content.should include('regex')
87
+ end
88
+ it "allows meta_description_selector to be configured for a certain path" do
89
+ @response_body_test.description_content.should_not == @response_body.description_content
90
+ end
91
+ it "allows meta_keywords_selector to be configured for a certain path" do
92
+ @response_body_test.keywords_content.should_not == @response_body.keywords_content
93
+ end
94
+ end
95
+
96
+ end
97
+ end
@@ -0,0 +1,437 @@
1
+ <!doctype html>
2
+ <html>
3
+ <head>
4
+ <!-- 1140px Grid styles for IE -->
5
+ <!--[if lte IE 9]><link rel="stylesheet" href="/css/ie.css" type="text/css" media="screen" /><![endif]-->
6
+ <link rel="stylesheet" href="/css/1140.css" type="text/css" media="screen" />
7
+ <link rel="stylesheet" href="/css/main.css"href="/css/main.css" type="text/css" media="screen" />
8
+ <link rel="alternate" type="application/atom+xml" title="Never create Ruby strings longer than 23 characters - feed" href="http://feeds2.feedburner.com/patshaughnessy" />
9
+ <meta http-equiv="Content-Type" content="text/html; charset=UTF-8"/>
10
+ <title>Never create Ruby strings longer than 23 characters - Pat Shaughnessy</title>
11
+ </head>
12
+ <body>
13
+ <div id="banner">
14
+ <div class="row">
15
+ <div class="onecol"></div>
16
+ <div class="elevencol last">
17
+ <a href="/">
18
+ <span id="title">
19
+ Pat Shaughnessy
20
+ </span>
21
+ <span id="tagline">
22
+ blogger, rubyist, aspiring author
23
+ </span>
24
+ </a>
25
+ </div>
26
+ </div>
27
+ </div>
28
+ <div id="container">
29
+ <div class="row">
30
+ <div class="onecol"></div>
31
+
32
+ <div class="ninecol white">
33
+
34
+ <article class="post">
35
+ <header>
36
+ <h1>Never create Ruby strings longer than 23 characters</h1>
37
+ <div class="metadata">
38
+ <span class="date">January 4th 2012</span>&nbsp;&mdash;&nbsp;<a href="#disqus_thread" data-disqus-identifier="http://patshaughnessy.net/2012/1/4/never-create-ruby-strings-longer-than-23-characters" class="date">&nbsp; Comments and &nbsp; Reactions</a>&nbsp;&mdash;&nbsp;<a href="/tags/ruby-internals" class="tag">more on Ruby internals</a><br/>
39
+
40
+ <a href="https://twitter.com/share" class="twitter-share-button" data-count="horizontal" data-via="pat_shaughnessy" data-text="Never create Ruby strings longer than 23 characters">Tweet</a><script type="text/javascript" src="//platform.twitter.com/widgets.js"></script>
41
+
42
+ </div>
43
+ </header>
44
+
45
+ <section class="content" id="content">
46
+ <div style="float: left; padding: 7px 30px 10px 0px">
47
+ <table cellpadding="0" cellspacing="0" border="0">
48
+ <tr><td><img src="http://patshaughnessy.net/assets/2012/1/4/microscope.jpg"></td></tr>
49
+ <tr><td align="center"><small><i>Looking at things through a microscope<br/>sometimes leads to surprising discoveries</i></small></td></tr>
50
+ </table>
51
+ </div>
52
+
53
+
54
+ <p>Obviously this is an utterly preposterous statement: it’s hard to think of a more ridiculous and esoteric coding requirement. I can just imagine all sorts of amusing conversations with designers and business sponsors: “No&hellip; the size of this &lt;input&gt; field should be 23&hellip; 24 is just too long!” Or: “We need to explain to users that their subject lines should be less than 23 letters&hellip;” Or: “Twitter got it all wrong&hellip; the 140 limit should have been 23!”</p>
55
+
56
+ <p>Why in the world would I even imagine saying this? As silly as this requirement might be, there is actually a grain of truth behind it: creating shorter Ruby strings is actually much faster than creating longer ones. It turns out that this line of Ruby code:</p>
57
+
58
+ <div class="CodeRay">
59
+ <div class="code"><pre>
60
+ str = <span class="s"><span class="dl">&quot;</span><span class="k">1234567890123456789012</span><span class="dl">&quot;</span></span> + <span class="s"><span class="dl">&quot;</span><span class="k">x</span><span class="dl">&quot;</span></span>
61
+ </pre></div>
62
+ </div>
63
+
64
+
65
+
66
+ <p>&hellip; is executed about twice as fast by the MRI 1.9.3 Ruby interpreter than this line of Ruby code:</p>
67
+
68
+ <div class="CodeRay">
69
+ <div class="code"><pre>
70
+ str = <span class="s"><span class="dl">&quot;</span><span class="k">12345678901234567890123</span><span class="dl">&quot;</span></span> + <span class="s"><span class="dl">&quot;</span><span class="k">x</span><span class="dl">&quot;</span></span>
71
+ </pre></div>
72
+ </div>
73
+
74
+
75
+
76
+ <p>Huh? What’s the difference? These two lines look identical! Well, the difference is that the first line creates a new string containing 23 characters, while the second line creates one with 24. It turns out that the MRI Ruby 1.9 interpreter is optimized to handle strings containing 23 characters or less more quickly than longer strings. This isn’t true for Ruby 1.8.</p>
77
+
78
+ <p>Today I’m going to take a close look at the MRI Ruby 1.9 interpreter to see how it actually handles saving string values&hellip; and why this is actually true.</p>
79
+
80
+ <h2>Not all strings are created equal</h2>
81
+
82
+ <p>Over the holidays I decided to read through the the <a href="http://rhg.rubyforge.org/">Ruby Hacking Guide</a>. If you’ve never heard of it, it’s a great explanation of how the Ruby interpreter works internally. Unfortunately it’s written in Japanese, but a few of the chapters have been translated into English. <a href="http://rhg.rubyforge.org/chapter02.html">Chapter 2</a>, one of the translated chapters, was a great place to start since it explains all of the basic Ruby data types, including strings.</p>
83
+
84
+ <p>After reading through that, I decided to dive right into the MRI 1.9.3 C source code to learn more about how Ruby handles strings; since I use RVM for me the Ruby source code is located under ~/.rvm/src/ruby-1.9.3-preview1. I started by looking at include/ruby/ruby.h, which defines all of the basic Ruby data types, and string.c, which implements Ruby String objects.</p>
85
+
86
+ <p>Reading the C code I discovered that Ruby actually uses three different types of string values, which I call:</p>
87
+
88
+ <ul>
89
+ <li>Heap Strings,</li>
90
+ <li>Shared Strings, and</li>
91
+ <li>Embedded Strings</li>
92
+ </ul>
93
+
94
+
95
+ <p>I found this fascinating! For years I’ve assumed every Ruby String object was like every other String object. But it turns out this is not true! Let’s take a closer look&hellip;</p>
96
+
97
+ <h2>Heap Strings</h2>
98
+
99
+ <p>The standard and most common way for Ruby to save string data is in the “heap.” The heap is a core concept of the C language: it’s a large pool of memory that C programmers can allocate from and use via a call to the <span class="code">malloc</span> function. For example, this line of C code allocates a 100 byte chunk of memory from the heap and saves its memory address into a pointer:</p>
100
+
101
+ <div class="CodeRay">
102
+ <div class="code"><pre>
103
+ <span class="pt">char</span> *ptr = malloc(<span class="i">100</span>);
104
+ </pre></div>
105
+ </div>
106
+
107
+
108
+
109
+ <p>Later, when the C programmer is done with this memory, she can release it and return it to the system using <span class="code">free</span>:</p>
110
+
111
+ <div class="CodeRay">
112
+ <div class="code"><pre>
113
+ free(ptr);
114
+ </pre></div>
115
+ </div>
116
+
117
+
118
+
119
+ <p>Avoiding the need to manage memory in this very manual and explicit way is one of the biggest benefits of using any high level programming language, such as Ruby, Java, C#, etc. When you create a string value in Ruby code like this, for example:</p>
120
+
121
+ <div class="CodeRay">
122
+ <div class="code"><pre>
123
+ str = <span class="s"><span class="dl">&quot;</span><span class="k">Lorem ipsum dolor sit amet, consectetur adipisicing elit</span><span class="dl">&quot;</span></span>
124
+ </pre></div>
125
+ </div>
126
+
127
+
128
+
129
+ <p>&hellip; the Ruby interpreter creates a structure called “RString” that conceptually looks like this:</p>
130
+
131
+ <p><img src="http://patshaughnessy.net/assets/2012/1/4/heap-string.png" alt="heap strings" /></p>
132
+
133
+ <p>You can see here that the RString structure contains two values: <span class="code">ptr</span> and <span class="code">len</span>, but not the actual string data itself. Ruby actually saves the string character values themselves in some memory allocated from the heap, and then sets <span class="code">ptr</span> to the location of that heap memory, and <span class="code">len</span> to the length of the string.</p>
134
+
135
+ <p>Here’s a simplified version of the C RString structure:</p>
136
+
137
+ <div class="CodeRay">
138
+ <div class="code"><pre>
139
+ <span class="r">struct</span> RString {
140
+ <span class="pt">long</span> len;
141
+ <span class="pt">char</span> *ptr;
142
+ };
143
+ </pre></div>
144
+ </div>
145
+
146
+
147
+
148
+ <p>I’ve simplified this a lot; there are actually a number of other values saved in this C struct. I’ll discuss some of them next, and others I’ll skip over for today. If you’re not familiar with C, you can think of <span class="code">struct</span> (short for “structure”) as an object that contains a set of instance variables, except in C there’s no object at all &ndash; a struct is just a chunk of memory containing a few values.</p>
149
+
150
+ <p>I refer to this type of Ruby string as “Heap String,” since the actual string data is saved in the heap.</p>
151
+
152
+ <h2>Shared Strings</h2>
153
+
154
+ <p>Another type of string value that the Ruby interpreter uses is called a “Shared String” in the Ruby C source code. You create a Shared String every time you write a line of Ruby code that copies one string to another, similar to this:</p>
155
+
156
+ <div class="CodeRay">
157
+ <div class="code"><pre>
158
+ str = <span class="s"><span class="dl">&quot;</span><span class="k">Lorem ipsum dolor sit amet, consectetur adipisicing elit</span><span class="dl">&quot;</span></span>
159
+ str2 = str
160
+ </pre></div>
161
+ </div>
162
+
163
+
164
+
165
+ <p>Here the Ruby interpreter has realized that you are assigning the same string value to two variables: str and str2. So in fact there’s no need to create two copies of the string data itself; instead Ruby creates two RString values that share the single copy of the string data. The way this works is that both RString structs contain the same ptr value to the shared data&hellip; meaning both strings contain the same value. There’s also a <span class="code">shared</span> value saved in the second RString struct that points to the first RString struct. There are some other details which I’m not showing here, such as some bit mask flags that Ruby uses to keep track of which RString’s are shared and which are not.</p>
166
+
167
+ <p><img src="http://patshaughnessy.net/assets/2012/1/4/shared-string.png" alt="shared strings" /></p>
168
+
169
+ <p>Aside from saving memory, this also speeds up execution of your Ruby programs dramatically by avoiding the need to allocate more memory from the heap using another call to <span class="code">malloc</span>. <span class="code">Malloc</span> is actually a fairly expensive operation: it takes time to track down available memory of the proper size in the heap, and also to keep track of it for freeing later.</p>
170
+
171
+ <p>Here’s a somewhat more accurate version of the C RString structure, including the <span class="code">shared</span> value:</p>
172
+
173
+ <div class="CodeRay">
174
+ <div class="code"><pre>
175
+ struct RString {
176
+ long len;
177
+ char *ptr;
178
+ VALUE shared;
179
+ };
180
+ </pre></div>
181
+ </div>
182
+
183
+
184
+
185
+ <p>Strings that are copied from one variable to another like this I call “Shared Strings.”</p>
186
+
187
+ <h2>Embedded Strings</h2>
188
+
189
+ <p>The third and last way that MRI Ruby 1.9 saves string data is by embedding the characters into the RString structure itself, like this:</p>
190
+
191
+ <div class="CodeRay">
192
+ <div class="code"><pre>
193
+ str3 = <span class="s"><span class="dl">&quot;</span><span class="k">Lorem ipsum dolor</span><span class="dl">&quot;</span></span>
194
+ </pre></div>
195
+ </div>
196
+
197
+
198
+
199
+ <p><img src="http://patshaughnessy.net/assets/2012/1/4/embedded-string.png" alt="embedded strings" /></p>
200
+
201
+ <p>This RString structure contains a character array called <span class="code">ary</span> and not the <span class="code">ptr</span>, <span class="code">len</span> and <span class="code">shared</span> values we saw above. Here’s another simplified definition of the same RString structure, this time containing the <span class="code">ary</span> character array:</p>
202
+
203
+ <div class="CodeRay">
204
+ <div class="code"><pre>
205
+ <span class="r">struct</span> RString {
206
+ <span class="pt">char</span> ary[RSTRING_EMBED_LEN_MAX + <span class="i">1</span>];
207
+ }
208
+ </pre></div>
209
+ </div>
210
+
211
+
212
+
213
+ <p>If you’re not familiar with C code, the syntax <span class="code">char ary[100]</span> creates an array of 100 characters (bytes). Unlike Ruby, C arrays are not objects; instead they are really just a collection of bytes. In C you have to specify the length of the array you want to create ahead of time.</p>
214
+
215
+ <p>How do Embedded Strings work? Well, the key is the size of the <span class="code">ary</span> array, which is set to <span class="code">RSTRING_EMBED_LEN_MAX+1</span>. If you’re running a 64-bit version of Ruby <span class="code">RSTRING_EMBED_LEN_MAX</span> is set to 24. That means a short string like this will fit into the RString <span class="code">ary</span> array:</p>
216
+
217
+ <div class="CodeRay">
218
+ <div class="code"><pre>
219
+ str = <span class="s"><span class="dl">&quot;</span><span class="k">Lorem ipsum dolor</span><span class="dl">&quot;</span></span>
220
+ </pre></div>
221
+ </div>
222
+
223
+
224
+
225
+ <p>&hellip; while a longer string like this will not:</p>
226
+
227
+ <div class="CodeRay">
228
+ <div class="code"><pre>
229
+ str = <span class="s"><span class="dl">&quot;</span><span class="k">Lorem ipsum dolor sit amet, consectetur adipisicing elit</span><span class="dl">&quot;</span></span>
230
+ </pre></div>
231
+ </div>
232
+
233
+
234
+
235
+ <h2>How Ruby creates new string values</h2>
236
+
237
+ <p>Whenever you create a string value in your Ruby 1.9 code, the interpreter goes through an algorithm similar to this:</p>
238
+
239
+ <ul>
240
+ <li>Is this a new string value? Or a copy of an existing string? If it’s a copy, Ruby creates a Shared String. This is the fastest option, since Ruby only needs a new RString structure, and not another copy of the existing string data.</li>
241
+ <li>Is this a long string? Or a short string? If the new string value is 23 characters or less, Ruby creates an Embedded String. While not as fast as a Shared String, it’s still fast because the 23 characters are simply copied right into the RString structure and there’s no need to call <span class="code">malloc</span>.</li>
242
+ <li>Finally, for long string values, 24 characters or more, Ruby creates a Heap String - meaning it calls <span class="code">malloc</span> and gets some new memory from the heap, and then copies the string value there. This is the slowest option.</li>
243
+ </ul>
244
+
245
+
246
+ <h2>The actual RString structure</h2>
247
+
248
+ <p>For those of you familiar with the C language, here’s the actual Ruby 1.9 definition of RString:</p>
249
+
250
+ <div class="CodeRay">
251
+ <div class="code"><pre>
252
+ <span class="r">struct</span> RString {
253
+
254
+ <span class="r">struct</span> RBasic basic;
255
+
256
+ <span class="r">union</span> {
257
+ <span class="r">struct</span> {
258
+ <span class="pt">long</span> len;
259
+ <span class="pt">char</span> *ptr;
260
+ <span class="r">union</span> {
261
+ <span class="pt">long</span> capa;
262
+ VALUE shared;
263
+ } aux;
264
+ } heap;
265
+
266
+ <span class="pt">char</span> ary[RSTRING_EMBED_LEN_MAX + <span class="i">1</span>];
267
+ } as;
268
+ };
269
+ </pre></div>
270
+ </div>
271
+
272
+
273
+
274
+ <p>I won’t try to explain all the code details here, but here are a couple important things to learn about Ruby strings from this definition:</p>
275
+
276
+ <ul>
277
+ <li>The <span class="code">RBasic</span> structure keeps track of various important bits of information about this string, such as flags indicating whether it’s shared or embedded, and a pointer to the corresponding Ruby String object structure.</li>
278
+ <li>The <span class="code">capa</span> value keeps track of the “capacity” of each heap string... it turns out Ruby will often allocate more memory than is required for each heap string, again to avoid extra calls to <span class="code">malloc</span> if a string size changes.</li>
279
+ <li>The use of <span class="code">union</span> allows Ruby to EITHER save the len/ptr/capa/shared information OR the actual string data itself.</li>
280
+ <li>The value of <span class="code">RSTRING_EMBED_LEN_MAX</span> was chosen to match the size of the len/ptr/capa values. That’s where the 23 limit comes from.</li>
281
+ </ul>
282
+
283
+
284
+ <p>Here’s the line of code from ruby.h that defines this value:</p>
285
+
286
+ <div class="CodeRay">
287
+ <div class="code"><pre>
288
+ <span class="pp">#define</span> RSTRING_EMBED_LEN_MAX ((<span class="pt">int</span>)((<span class="r">sizeof</span>(VALUE)*<span class="i">3</span>)/<span class="r">sizeof</span>(<span class="pt">char</span>)-<span class="i">1</span>))
289
+ </pre></div>
290
+ </div>
291
+
292
+
293
+
294
+ <p>On a 64 bit machine, sizeof(VALUE) is 8, leading to the limit of 23 characters. This will be smaller for a 32 bit machine.</p>
295
+
296
+ <h2>Benchmarking Ruby string allocation</h2>
297
+
298
+ <p>Let’s try to measure how much faster short strings are vs. long strings in Ruby 1.9.3 &ndash; here’s a simple line of code that dynamically creates a new string by appending a single character onto the end:</p>
299
+
300
+ <div class="CodeRay">
301
+ <div class="code"><pre>
302
+ new_string = str + <span class="s"><span class="dl">'</span><span class="k">x</span><span class="dl">'</span></span>
303
+ </pre></div>
304
+ </div>
305
+
306
+
307
+
308
+ <p>The <span class="code">new_string</span> value will either be a Heap String or an Embedded String, depending on how long the <span class="code">str</span> variable’s value is. The reason I need to use a string concatenation operation, the <span class="code">+ &lsquo;x&rsquo;</span> part, is to force Ruby to allocate a new string dynamically. Otherwise if I just used <span class="code">new_string = str</span>, I would get a Shared String.</p>
309
+
310
+ <p>Now I’ll call this method from a loop and benchmark it:</p>
311
+
312
+ <div class="CodeRay">
313
+ <div class="code"><pre>
314
+ require <span class="s"><span class="dl">'</span><span class="k">benchmark</span><span class="dl">'</span></span>
315
+
316
+ <span class="co">ITERATIONS</span> = <span class="i">1000000</span>
317
+
318
+ <span class="r">def</span> <span class="fu">run</span>(str, bench)
319
+ bench.report(<span class="s"><span class="dl">&quot;</span><span class="il"><span class="idl">#{</span>str.length + <span class="i">1</span><span class="idl">}</span></span><span class="k"> chars</span><span class="dl">&quot;</span></span>) <span class="r">do</span>
320
+ <span class="co">ITERATIONS</span>.times <span class="r">do</span>
321
+ new_string = str + <span class="s"><span class="dl">'</span><span class="k">x</span><span class="dl">'</span></span>
322
+ <span class="r">end</span>
323
+ <span class="r">end</span>
324
+ <span class="r">end</span>
325
+ </pre></div>
326
+ </div>
327
+
328
+
329
+
330
+ <p>Here I’m using the benchmark library to measure how long it takes to call that method 1 million times. Now running this with a variety of different string lengths:</p>
331
+
332
+ <div class="CodeRay">
333
+ <div class="code"><pre>
334
+ <span class="co">Benchmark</span>.bm <span class="r">do</span> |bench|
335
+ run(<span class="s"><span class="dl">&quot;</span><span class="k">12345678901234567890</span><span class="dl">&quot;</span></span>, bench)
336
+ run(<span class="s"><span class="dl">&quot;</span><span class="k">123456789012345678901</span><span class="dl">&quot;</span></span>, bench)
337
+ run(<span class="s"><span class="dl">&quot;</span><span class="k">1234567890123456789012</span><span class="dl">&quot;</span></span>, bench)
338
+ run(<span class="s"><span class="dl">&quot;</span><span class="k">12345678901234567890123</span><span class="dl">&quot;</span></span>, bench)
339
+ run(<span class="s"><span class="dl">&quot;</span><span class="k">123456789012345678901234</span><span class="dl">&quot;</span></span>, bench)
340
+ run(<span class="s"><span class="dl">&quot;</span><span class="k">1234567890123456789012345</span><span class="dl">&quot;</span></span>, bench)
341
+ run(<span class="s"><span class="dl">&quot;</span><span class="k">12345678901234567890123456</span><span class="dl">&quot;</span></span>, bench)
342
+ <span class="r">end</span>
343
+ </pre></div>
344
+ </div>
345
+
346
+
347
+
348
+ <p>We get an interesting result:</p>
349
+
350
+ <div class="CodeRay">
351
+ <div class="code"><pre>
352
+ user system total real
353
+ 21 chars 0.250000 0.000000 0.250000 ( 0.247459)
354
+ 22 chars 0.250000 0.000000 0.250000 ( 0.246954)
355
+ 23 chars 0.250000 0.000000 0.250000 ( 0.248440)
356
+ 24 chars 0.480000 0.000000 0.480000 ( 0.478391)
357
+ 25 chars 0.480000 0.000000 0.480000 ( 0.479662)
358
+ 26 chars 0.480000 0.000000 0.480000 ( 0.481211)
359
+ 27 chars 0.490000 0.000000 0.490000 ( 0.490404)
360
+ </pre></div>
361
+ </div>
362
+
363
+
364
+
365
+ <p>Note that when the string length is 23 or less, it takes about 250ms to create 1 million new strings. But when my string length is 24 or more, it takes around 480ms, almost twice as long!</p>
366
+
367
+ <p>Here’s a graph showing some more data; the bars show how long it takes to allocate 1 million strings of the given length:</p>
368
+
369
+ <p><img src="http://patshaughnessy.net/assets/2012/1/4/string-allocations.png" alt="string allocation chart" /></p>
370
+
371
+ <h2>Conclusion</h2>
372
+
373
+ <p>Don’t worry! I don’t think you should refactor all your code to be sure you have strings of length 23 or less. That would obviously be ridiculous. The speed increase sounds impressive, but actually the time differences I measured were insignificant until I allocated 100,000s or millions of strings &ndash; how many Ruby applications will need to create this many string values? And even if you do need to create many string objects, the pain and confusion caused by using only short strings would overwhelm any performance benefit you might get.</p>
374
+
375
+ <p>For me I really think understanding something about how the Ruby interpreter works is just fun! I enjoyed taking a look through a microscope at these sorts of tiny details. I do also suspect having some understanding of how Matz and his colleagues actually implemented the language will eventually help me to use Ruby in a wiser and more knowledgeable way. We’ll have to see&hellip; stay tuned for some more posts about Ruby internals!</p>
376
+
377
+ </section>
378
+
379
+
380
+ <section class="comments">
381
+ <a href="http://patshaughnessy.disqus.com/?url=ref">View the discussion thread.</a>
382
+ </section>
383
+
384
+ </article>
385
+
386
+ </div>
387
+
388
+ <div class="twocol last" id="right">
389
+ <div id="sidebar">
390
+ <img src="/images/pat.jpg"/>
391
+ <div class="header">Subscribe&nbsp;<a href="http://feeds.feedburner.com/patshaughnessy"><img src="/images/feed-icon16x16.png"/></a></div>
392
+ <div class="links">
393
+ <ul>
394
+ <li>
395
+ <a href="http://feeds.feedburner.com/patshaughnessy"><img src="http://feeds.feedburner.com/~fc/patshaughnessy?bg=99CCFF&amp;fg=444444&amp;anim=0" height="26" width="88" style="border:0" alt="" /></a>
396
+ </li>
397
+ <li>
398
+ <a href="https://twitter.com/pat_shaughnessy" class="twitter-follow-button" data-show-count="false" data-show-screen-name="false">Follow @pat_shaughnessy</a>
399
+ <a href="http://twitter.com/pat_shaughnessy">@pat_shaughnessy</a>
400
+ </li>
401
+ </ul>
402
+ </div>
403
+ <div class="header">Buy my book</div>
404
+ <div class="links">
405
+ <ul>
406
+ <li><a href="http://patshaughnessy.net/ruby-under-a-microscope"><img src="http://patshaughnessy.net/images/cover-ana.png"></a></li>
407
+ <li id="eBook"><a href="http://patshaughnessy.net/ruby-under-a-microscope">Ruby Under a Microscope</a></li>
408
+ </ul>
409
+ </div>
410
+ <div class="header">Popular</div>
411
+ <div class="links">
412
+ <ul>
413
+ <li><a href="http://patshaughnessy.net/2012/1/4/never-create-ruby-strings-longer-than-23-characters">Never create Ruby strings longer than 23 characters</a></li>
414
+ <li><a href="http://patshaughnessy.net/2012/3/23/why-you-should-be-excited-about-garbage-collection-in-ruby-2-0">Why You Should Be Excited About Garbage Collection in Ruby 2.0</a></li>
415
+ <li><a href="http://patshaughnessy.net/2011/10/14/why-bundler-1-1-will-be-much-faster">Why Bundler 1.1 will be much faster</a></li>
416
+ <li><a href="http://patshaughnessy.net/2012/2/29/the-joke-is-on-us-how-ruby-1-9-supports-the-goto-statement">The Joke Is On Us: How Ruby 1.9 Supports the Goto Statement</a></li>
417
+ </ul>
418
+ </div>
419
+ <div class="header">Recent</div>
420
+ <div class="links">
421
+ <ul>
422
+ <li><a href="http://patshaughnessy.net/2013/2/12/an-interview-with-laurent-sansonetti">An Interview With Laurent Sansonetti</a></li>
423
+ <li><a href="http://patshaughnessy.net/2013/2/8/ruby-mri-source-code-idioms-3-embedded-objects">Ruby MRI Source Code Idioms #3: Embedded Objects</a></li>
424
+ <li><a href="http://patshaughnessy.net/2013/1/31/ruby-mri-source-code-idioms-2-c-that-resembles-ruby">Ruby MRI Source Code Idioms #2: C That Resembles Ruby</a></li>
425
+ <li><a href="http://patshaughnessy.net/2013/1/23/ruby-mri-source-code-idioms-1-accessing-data-via-macros">Ruby MRI Source Code Idioms #1: Accessing Data Via Macros</a></li>
426
+ </ul>
427
+ </div>
428
+ <div class="header"><a href="/">More...</a></div>
429
+ </div>
430
+ </div>
431
+
432
+ <div class="row" id="copyright">
433
+ <p>Content and UI design &copy; 2013 Pat Shaughnessy &mdash; powered by <a href="http://cloudhead.io/toto">Toto</a> &mdash; comments powered by <a href="http://disqus.com">Disqus</a>
434
+ </div>
435
+ </div>
436
+ </body>
437
+ </html>
@@ -0,0 +1,10 @@
1
+ <!doctype html>
2
+ <html>
3
+ <body>
4
+ <h1>
5
+ Bonjour,
6
+ tout les monde!
7
+ </h1>
8
+ <div>Here is some sample text to keep you busy.</div>
9
+ </body>
10
+ </html>