profile-viewer 0.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/dist/022efb839d22fa54a716.svg +28 -0
- data/dist/0c510afd6169a0a83f97.svg +14 -0
- data/dist/0d5cf282780cd1a5ec64.svg +8 -0
- data/dist/119.cc58ce313e67f80f50f3.bundle.js +2 -0
- data/dist/119.cc58ce313e67f80f50f3.bundle.js.map +1 -0
- data/dist/11c5dca6d97c9e10e5b7.svg +9 -0
- data/dist/131.c21d348572deab4ece66.bundle.js +2 -0
- data/dist/131.c21d348572deab4ece66.bundle.js.map +1 -0
- data/dist/18e7fdd45099134897d2.svg +8 -0
- data/dist/2d4b477bc424d35a0245.svg +25 -0
- data/dist/2e43ad991eb141fc017f.svg +9 -0
- data/dist/2ea5b50b7361e6de561c.svg +11 -0
- data/dist/308.4d236ce7e6451807cb54.bundle.js +2 -0
- data/dist/308.4d236ce7e6451807cb54.bundle.js.map +1 -0
- data/dist/31fe933f3a12be1aa7f3.svg +4 -0
- data/dist/390aa266f451c1005f61.svg +10 -0
- data/dist/3c3fda7c08bcc9544c64.svg +6 -0
- data/dist/4260d5db8309a6f83637.svg +10 -0
- data/dist/4536fd0738f36c3463bc.svg +1 -0
- data/dist/490065792b7e903c9f3e.svg +6 -0
- data/dist/49da6a2153f62ef73d17.svg +7 -0
- data/dist/4d26f0e38c22eedde178.jpg +0 -0
- data/dist/4ecb077d8715f89c0f87.svg +13 -0
- data/dist/524e896f487119a0b832.svg +13 -0
- data/dist/58c5415e952fb6dddd6b.png +0 -0
- data/dist/6bd0589a27236471fdab.svg +4 -0
- data/dist/6c6b49af3a86dfdd44e6.svg +4 -0
- data/dist/6c8063be3afa1d95c902.svg +3 -0
- data/dist/71730566d6c47ffdc965.svg +12 -0
- data/dist/7273fadce89da05535e5.svg +4 -0
- data/dist/731673c749e57bf6f544.svg +13 -0
- data/dist/76e5b29823c9fd62d90d.svg +3 -0
- data/dist/7853c71223701f30d495.svg +19 -0
- data/dist/79856ce399cb305fafb8.svg +19 -0
- data/dist/79eaad4fc0c398100809.svg +6 -0
- data/dist/7c7ac3c7df370340cfd2.svg +7 -0
- data/dist/7e3ac9afb25cfe809520.svg +1 -0
- data/dist/81762b0b1aacd3686a6b.svg +12 -0
- data/dist/86e81402ef76d28ff55f.svg +15 -0
- data/dist/8aae7b979b04407f71a4.svg +3 -0
- data/dist/8b8b909e42722172d494.svg +7 -0
- data/dist/8dab2a6ba757bcc6e9a5.svg +3 -0
- data/dist/9.071a712ea648c8b30416.bundle.js +2 -0
- data/dist/9.071a712ea648c8b30416.bundle.js.map +1 -0
- data/dist/9103e94f1d34c15d44be.svg +6 -0
- data/dist/914.b9bc213d93173ce6b0cb.bundle.js +2 -0
- data/dist/914.b9bc213d93173ce6b0cb.bundle.js.map +1 -0
- data/dist/9a7bd6ec36312a2baa7e.svg +1 -0
- data/dist/9d858d1a3ab57f8ee2e1.svg +4 -0
- data/dist/9eb1fab2684d1e1f0e26.svg +13 -0
- data/dist/_headers +37 -0
- data/dist/_redirects +2 -0
- data/dist/a3196e840709b18a3119.svg +1 -0
- data/dist/aa867391c311267af5a9.svg +4 -0
- data/dist/acb8393f3fb9c59b15c9.svg +20 -0
- data/dist/ad13da76642d8099fe70.module.wasm +0 -0
- data/dist/b45b29da558efa211628.jpg +0 -0
- data/dist/b5698a02eef37ce29146.svg +10 -0
- data/dist/b805360fcc91834556c9.svg +4 -0
- data/dist/before-load.js +1 -0
- data/dist/c3432220f657733ed05f.svg +10 -0
- data/dist/ca2af827049e9039ef9c.svg +8 -0
- data/dist/contribute.json +37 -0
- data/dist/d09537c705fb0878eb63.svg +4 -0
- data/dist/d9c199b3e3e469cc5713.svg +10 -0
- data/dist/da1f21c60c7217745dd8.svg +1 -0
- data/dist/docs/README.md +7 -0
- data/dist/docs/_navbar.md +3 -0
- data/dist/docs/_sidebar.md +27 -0
- data/dist/docs/advanced-topics.md +9 -0
- data/dist/docs/bunny-2.md +78 -0
- data/dist/docs/bunny.md +281 -0
- data/dist/docs/case-studies.md +5 -0
- data/dist/docs/css/style-overrides.css +160 -0
- data/dist/docs/css/vue_v4.12.2.min.css +858 -0
- data/dist/docs/gitpod.md +39 -0
- data/dist/docs/guide-android-profiling.md +46 -0
- data/dist/docs/guide-filtering-call-trees.md +87 -0
- data/dist/docs/guide-getting-started.md +115 -0
- data/dist/docs/guide-perf-profiling.md +76 -0
- data/dist/docs/guide-profiler-fundamentals.md +33 -0
- data/dist/docs/guide-profiling-android-directly-on-device.md +34 -0
- data/dist/docs/guide-profiling-firefox-android.md +7 -0
- data/dist/docs/guide-remote-profiling.md +90 -0
- data/dist/docs/guide-removing-profiler.md +4 -0
- data/dist/docs/guide-stack-samples-and-call-trees.md +57 -0
- data/dist/docs/guide-startup-shutdown.md +108 -0
- data/dist/docs/guide-ui-tour-panels.md +95 -0
- data/dist/docs/guide-ui-tour-timeline.md +76 -0
- data/dist/docs/images/about-debugging-remote-profiling-panel.png +0 -0
- data/dist/docs/images/about-debugging-remote.png +0 -0
- data/dist/docs/images/about-url.png +0 -0
- data/dist/docs/images/allocation-calltree-2019-12-11.png +0 -0
- data/dist/docs/images/allocation-feature.png +0 -0
- data/dist/docs/images/allocation-flame-graph-2019-12-11.png +0 -0
- data/dist/docs/images/allocation-js.png +0 -0
- data/dist/docs/images/allocation-track.png +0 -0
- data/dist/docs/images/bunny-analysis/bunny.png +0 -0
- data/dist/docs/images/bunny-analysis/clone-flame-content.png +0 -0
- data/dist/docs/images/bunny-analysis/clone-flame-worker.png +0 -0
- data/dist/docs/images/bunny-analysis/clone-thread-list.png +0 -0
- data/dist/docs/images/bunny-analysis/fillstyle-thread-list-measure.png +0 -0
- data/dist/docs/images/bunny-analysis/fillstyle-thread-list.png +0 -0
- data/dist/docs/images/bunny-analysis/flame-graph-content.png +0 -0
- data/dist/docs/images/bunny-analysis/flame-graph-set-fillstyle.png +0 -0
- data/dist/docs/images/bunny-analysis/flame-graph-worker.png +0 -0
- data/dist/docs/images/bunny-analysis/focus-subtree.png +0 -0
- data/dist/docs/images/bunny-analysis/threads-list-measure.png +0 -0
- data/dist/docs/images/bunny-analysis/threads-list.png +0 -0
- data/dist/docs/images/bunny-analysis/threads-parallel.png +0 -0
- data/dist/docs/images/bunny-analysis/threads-sync.png +0 -0
- data/dist/docs/images/bunny-analysis/threads-work-parallel.png +0 -0
- data/dist/docs/images/bunny-analysis/threads-work-sync.png +0 -0
- data/dist/docs/images/call-tree-running-time.svg +82 -0
- data/dist/docs/images/call-tree-self-time.svg +82 -0
- data/dist/docs/images/call-tree.svg +131 -0
- data/dist/docs/images/favicon.svg +4 -0
- data/dist/docs/images/filter-search.svg +78 -0
- data/dist/docs/images/getting-started-delete-profile.png +0 -0
- data/dist/docs/images/getting-started-devtools-panel.png +0 -0
- data/dist/docs/images/getting-started-enable-popup.png +0 -0
- data/dist/docs/images/getting-started-list-uploaded-profiles.png +0 -0
- data/dist/docs/images/getting-started-naming-profiles.png +0 -0
- data/dist/docs/images/getting-started-popup.png +0 -0
- data/dist/docs/images/getting-started-upload-permalink.webm +0 -0
- data/dist/docs/images/getting-started-upload.png +0 -0
- data/dist/docs/images/getting-started-use-icon.webm +0 -0
- data/dist/docs/images/getting-started-use-popup.webm +0 -0
- data/dist/docs/images/implementation-2022-06-16.png +0 -0
- data/dist/docs/images/implementation-filter.svg +101 -0
- data/dist/docs/images/interval-2020-05.png +0 -0
- data/dist/docs/images/invert-2022-06-16.png +0 -0
- data/dist/docs/images/invert-after.svg +161 -0
- data/dist/docs/images/invert-before.svg +144 -0
- data/dist/docs/images/invert-call-tree.svg +113 -0
- data/dist/docs/images/ipc-messages-feature.png +0 -0
- data/dist/docs/images/ipc-messages-io-threads.png +0 -0
- data/dist/docs/images/ipc-messages-popup.png +0 -0
- data/dist/docs/images/popup.png +0 -0
- data/dist/docs/images/qr-gve-nightly.gif +0 -0
- data/dist/docs/images/qr-reference-browser-nightly.gif +0 -0
- data/dist/docs/images/samples.svg +116 -0
- data/dist/docs/images/screenshot-2022-04-25.png +0 -0
- data/dist/docs/images/search-2022-06-16.png +0 -0
- data/dist/docs/images/secret-menu-toast.png +0 -0
- data/dist/docs/images/settings-menu.png +0 -0
- data/dist/docs/images/simple-call-tree-self-time.svg +41 -0
- data/dist/docs/images/simple-call-tree.svg +41 -0
- data/dist/docs/images/simple-stacks-self-time.svg +96 -0
- data/dist/docs/images/simple-stacks.svg +118 -0
- data/dist/docs/images/start-profiler.png +0 -0
- data/dist/docs/images/transform-collapse-direct-recursion.svg +47 -0
- data/dist/docs/images/transform-collapse-resource.svg +83 -0
- data/dist/docs/images/transform-focus-function.svg +81 -0
- data/dist/docs/images/transform-focus-node.svg +86 -0
- data/dist/docs/images/transform-merge-function.svg +95 -0
- data/dist/docs/images/transform-merge-node.svg +96 -0
- data/dist/docs/images/transforms-2022-06-16.png +0 -0
- data/dist/docs/images/ui-tour-activity-graph.png +0 -0
- data/dist/docs/images/ui-tour-panels-call-tree.png +0 -0
- data/dist/docs/images/ui-tour-panels-flame-graph.png +0 -0
- data/dist/docs/images/ui-tour-panels-marker-chart.png +0 -0
- data/dist/docs/images/ui-tour-panels-network-chart.png +0 -0
- data/dist/docs/images/ui-tour-panels-stack-chart.png +0 -0
- data/dist/docs/images/ui-tour-panels.png +0 -0
- data/dist/docs/images/ui-tour-ranges.png +0 -0
- data/dist/docs/images/ui-tour-selection.webm +0 -0
- data/dist/docs/images/ui-tour-timeline-markers.png +0 -0
- data/dist/docs/images/ui-tour-timeline-memory.png +0 -0
- data/dist/docs/images/ui-tour-timeline-network.png +0 -0
- data/dist/docs/images/ui-tour-timeline-screenshots.png +0 -0
- data/dist/docs/images/ui-tour-timeline-track-selection.png +0 -0
- data/dist/docs/images/ui-tour-timeline.png +0 -0
- data/dist/docs/index.html +21 -0
- data/dist/docs/ipc-messages.md +44 -0
- data/dist/docs/js/docsify_v4.12.2+.min.js +1 -0
- data/dist/docs/js/ga_v4.12.2.min.js +1 -0
- data/dist/docs/js/init.js +1 -0
- data/dist/docs/js/search_v4.12.2.min.js +1 -0
- data/dist/docs/memory-allocations.md +70 -0
- data/dist/docs/videos-call-tree-1.md +5 -0
- data/dist/docs/videos-call-tree-2.md +5 -0
- data/dist/docs/videos-call-tree-3.md +5 -0
- data/dist/docs/videos-intro.md +7 -0
- data/dist/docs/videos-samples-markers.md +5 -0
- data/dist/docs/videos-threads.md +5 -0
- data/dist/docs/videos.md +32 -0
- data/dist/e4ed50222911c5af9a32.svg +12 -0
- data/dist/e70722c0fe0ac3d4227b.svg +10 -0
- data/dist/f0599659345cf76717cd.svg +4 -0
- data/dist/f8e25c2ebeb0a0725a9e.svg +12 -0
- data/dist/favicon.png +0 -0
- data/dist/fcb532a05dd4b09c2d08.svg +10 -0
- data/dist/fd040fb5f4e7a515bb3c.svg +15 -0
- data/dist/index.html +1 -0
- data/dist/locales/README.md +26 -0
- data/dist/locales/be/app.ftl +1003 -0
- data/dist/locales/de/app.ftl +994 -0
- data/dist/locales/el/app.ftl +1013 -0
- data/dist/locales/en-GB/app.ftl +1018 -0
- data/dist/locales/en-US/app.ftl +1125 -0
- data/dist/locales/es-CL/app.ftl +948 -0
- data/dist/locales/fr/app.ftl +942 -0
- data/dist/locales/fy-NL/app.ftl +1018 -0
- data/dist/locales/ia/app.ftl +1007 -0
- data/dist/locales/it/app.ftl +936 -0
- data/dist/locales/kab/app.ftl +557 -0
- data/dist/locales/nl/app.ftl +1018 -0
- data/dist/locales/pt-BR/app.ftl +947 -0
- data/dist/locales/ru/app.ftl +1032 -0
- data/dist/locales/sv-SE/app.ftl +1013 -0
- data/dist/locales/uk/app.ftl +1019 -0
- data/dist/locales/zh-CN/app.ftl +931 -0
- data/dist/locales/zh-TW/app.ftl +930 -0
- data/dist/main.8208fda2d35ddbe38d55.bundle.js +199 -0
- data/dist/main.8208fda2d35ddbe38d55.bundle.js.LICENSE.txt +92 -0
- data/dist/main.8208fda2d35ddbe38d55.bundle.js.map +1 -0
- data/dist/photon/31fe933f3a12be1aa7f3.svg +4 -0
- data/dist/photon/49da6a2153f62ef73d17.svg +7 -0
- data/dist/photon/6bd0589a27236471fdab.svg +4 -0
- data/dist/photon/6c8063be3afa1d95c902.svg +3 -0
- data/dist/photon/76e5b29823c9fd62d90d.svg +3 -0
- data/dist/photon/8aae7b979b04407f71a4.svg +3 -0
- data/dist/photon/8dab2a6ba757bcc6e9a5.svg +3 -0
- data/dist/photon/9103e94f1d34c15d44be.svg +6 -0
- data/dist/photon/aa867391c311267af5a9.svg +4 -0
- data/dist/photon/f0599659345cf76717cd.svg +4 -0
- data/dist/photon/index.html +214 -0
- data/dist/photon/main.8c8260452e7439ec6df9.bundle.js +2 -0
- data/dist/photon/main.8c8260452e7439ec6df9.bundle.js.map +1 -0
- data/dist/service-worker-compat.js +1 -0
- data/dist/sw.js +2 -0
- data/dist/sw.js.map +1 -0
- data/dist/workbox-27b29e6f.js +2 -0
- data/dist/workbox-27b29e6f.js.map +1 -0
- data/dist/zee-worker.js +1 -0
- data/ruby-bin/profile-viewer +87 -0
- metadata +281 -0
data/dist/docs/bunny.md
ADDED
@@ -0,0 +1,281 @@
|
|
1
|
+
# Case Study
|
2
|
+
## 2D canvas and worker messaging
|
3
|
+
|
4
|
+
The following article is a case study in using the profiler to identify performance issues. The fixes made the code run four times faster and changed the frame rate from a fairly slow 15fps to a smooth 60fps. The process for this analysis follows a common pattern:
|
5
|
+
|
6
|
+
* Profile the code
|
7
|
+
* Identify slow areas
|
8
|
+
* Form a hypothesis as to why it's slow
|
9
|
+
* Act upon the hypothesis and change the code
|
10
|
+
* Profile the code to measure the difference
|
11
|
+
* Evaluate the effectiveness of the code change
|
12
|
+
|
13
|
+
## The project description
|
14
|
+
|
15
|
+
![A picture of a 3d bunny rabbit model rendered using small squares.](./images/bunny-analysis/bunny.png)
|
16
|
+
|
17
|
+
The project is a website that takes user's JavaScript code and runs it to produce a visualization. The user's code only has access to the function `rect(color, x, y, width, height)`, which draws a rectangle to the screen. For the implementation, the website posts the user's code to a sandboxed iframe. The iframe has a `<canvas>` element that is rendered to via the [`CanvasRenderingContext2D`](https://developer.mozilla.org/en-US/docs/Web/API/CanvasRenderingContext2D) API. The user's code is evaluated in a WebWorker and then the results of `rect()` are posted back to the iframe's code.
|
18
|
+
|
19
|
+
In a simplified example, the `worker.js` would run something like:
|
20
|
+
|
21
|
+
```js
|
22
|
+
// Evaluate the user's code string:
|
23
|
+
eval(`
|
24
|
+
rect('#fff', 10, 10, 3, 3);
|
25
|
+
rect('#fff', 20, 20, 3, 3);
|
26
|
+
rect('#fff', 30, 30, 3, 3);
|
27
|
+
rect('#fff', 40, 40, 3, 3);
|
28
|
+
rect('#fff', 50, 50, 3, 3);
|
29
|
+
`);
|
30
|
+
|
31
|
+
// worker.js would post something like this to the iframe:
|
32
|
+
self.postMessage({
|
33
|
+
color: ['#fff', '#fff', '#fff', '#fff', '#fff'],
|
34
|
+
x: [10, 20, 30, 40, 50],
|
35
|
+
y: [10, 20, 30, 40, 50],
|
36
|
+
w: [3, 3, 3, 3, 3],
|
37
|
+
h: [3, 3, 3, 3, 3],
|
38
|
+
});
|
39
|
+
```
|
40
|
+
|
41
|
+
Then the iframe would draw the code using:
|
42
|
+
|
43
|
+
```js
|
44
|
+
worker.addEventListener('message', message => {
|
45
|
+
const { data } = message;
|
46
|
+
for (let i = 0; i < data.color.length; i++) {
|
47
|
+
ctx.fillStyle = data.color[i];
|
48
|
+
ctx.fillRect(data.x[i], data.y[i], data.w[i], data.h[i]);
|
49
|
+
}
|
50
|
+
});
|
51
|
+
```
|
52
|
+
|
53
|
+
This would result in user's evaluated code drawing something to the screen, like the bunny model in the above image.
|
54
|
+
|
55
|
+
## The problem
|
56
|
+
|
57
|
+
Baseline profile: https://perfht.ml/2IxTwqi
|
58
|
+
|
59
|
+
This code ended up not scaling well for large sets of rectangles being drawn to the screen. There were lots of stutters and a slow frame rate. The user's impact for fixing this problem would be to have a smoother frame rate, plus the ability to draw many more rectangles to the screen without slowing things down. In order to validate fixes, the following steps were used to reproduce the issue.
|
60
|
+
|
61
|
+
* Load the page with the bunny visualization.
|
62
|
+
* Hit Ctrl Shift 1 to turn on the Gecko Profiler.
|
63
|
+
* Wait around 5 seconds.
|
64
|
+
* Hit Ctrl Shift 2 to capture the profile.
|
65
|
+
* Set the range to 3.0 seconds of relatively stable frames that don't have stutters or GC pauses.
|
66
|
+
* Hide idle stacks by right clicking `__psync_cvwait` and `mach_msg_trap` in the Flame Graph, and choosing **"Drop samples with this function"**.
|
67
|
+
* Filter the threads to:
|
68
|
+
* The relevant content process
|
69
|
+
* The relevant DOM Worker
|
70
|
+
* The compositor
|
71
|
+
|
72
|
+
### Getting oriented
|
73
|
+
|
74
|
+
A nice place to start out is getting oriented with the thread stack graphs. These are in the headers and show the height of the stacks for the sample code.
|
75
|
+
|
76
|
+
Keep in mind that a higher stack doesn't mean the code took longer to execute. It only means that the stack height was taller, a rather arbitrary measure that is only useful for orienting oneself in the profile. Time is the X axis in this graph. There are gaps between the stacks. These gaps are the idle stacks that were hidden in the steps to reproduce.
|
77
|
+
|
78
|
+
![A view of the threads list with "Content", "DOM Worker", and "Compositor". It shows the interchange of time spent communicating back and forth.](./images/bunny-analysis/threads-list.png)
|
79
|
+
|
80
|
+
The time between frames can be measured using a range selection. The brown marks represent a `RefreshDriverTick`, which shows when the browser's image on the screen was refreshed. This will be a useful metric to describe smooth animation.
|
81
|
+
|
82
|
+
![A range selection of the above thread list showing 66ms.](./images/bunny-analysis/threads-list-measure.png)
|
83
|
+
|
84
|
+
The timing here is typically between 60-70ms. This is about 15 frames per second (fps), which is really too long. Visualizations should take around ~16ms per frame for the smooth 60fps visual experience.
|
85
|
+
|
86
|
+
The thread list also nicely shows the message passing between the content process' main thread and the worker thread. The content process posts a message and then effectively waits for a response before it does anything. This is a fairly common pattern to see in multi-threaded code.
|
87
|
+
|
88
|
+
### Problems in the content process' main thread
|
89
|
+
|
90
|
+
The Flame Graph provides a nice view into a summary of where time is spent. The X axis represents the percentage of time spent with that function in that stack for all visible stacks. In a previous step, the idle time was already hidden from the analysis.
|
91
|
+
|
92
|
+
The stacks are rather deep here, so a nice first step is to focus on only the subtree that is interesting. Visually, `nsThread::ProcessNextEvent` is the last function most common to the tree. Right click and focus on that subtree.
|
93
|
+
|
94
|
+
![A screenshot of the context menu on the flame graph for focus subtree.](./images/bunny-analysis/focus-subtree.png)
|
95
|
+
|
96
|
+
Two functions really stand out as taking a lot of time. `JSStructuredCloneReader::read` takes almost 30% of the time. It is a C++ function and is called when the iframe receives a message from the worker. It safely reads a clone of the data and provides it to the iframe's JavaScript code.
|
97
|
+
|
98
|
+
The bigger culprit is `drawRects`, which takes 60% of the time. This is the function that calls out the `CanvasRenderingContext2D` API to actually draw to the screen. There are two functions that are being called from drawRects. These are `set CanvasRenderingContext2D.fillStyle` and `CanvasRenderingContext2D.fillRect`.
|
99
|
+
|
100
|
+
https://perfht.ml/2Ios9PH
|
101
|
+
|
102
|
+
![Screenshot of the flame graph of the content process.](./images/bunny-analysis/flame-graph-content.png)
|
103
|
+
|
104
|
+
### Problems in the worker
|
105
|
+
|
106
|
+
Looking at the worker thread, first focus on the subtree of `(root scope) https://glittr-sandbox:4444/squares/worker.js`, as this contains the relevant code for the analysis.
|
107
|
+
|
108
|
+
https://perfht.ml/2Iu4mh4
|
109
|
+
|
110
|
+
![Screenshot of the flame graph of the worker process.](./images/bunny-analysis/flame-graph-worker.png)
|
111
|
+
|
112
|
+
There are two main functions that really stand out as taking up a large amount of time. The first is the `drawLineFromPoints`. This happens to be the user's code that is evaluated. Most of it we don't have control over. The `rect` function shows up, but it is a small part of the total time. `DedicatedWorkerGlobalScope.postMessage` and `JSStructuredCloneWriter::write` show up as taking most of the time. This is the part of the code where the worker is posting a message back to the iframe's JavaScript.
|
113
|
+
|
114
|
+
## Hypothesis
|
115
|
+
|
116
|
+
Based on this baseline report, the two areas that seem to be problematic are the `fillStyle` and the structured clone behavior with posting messages. Fixing these will significantly speed up the frame rate.
|
117
|
+
|
118
|
+
## Fixing `set fillStyle`
|
119
|
+
|
120
|
+
The repeated calls to `set fillStyle` are unnecessary for the bunny, as there are only 2 colors being drawn. The first is the grey background, then the second color is the white for the numerous rectangles being drawn to the screen. There is no reason to constantly re-evaluate the color. In fact, this could be a potential fix for the browser rather than this particular website.
|
121
|
+
|
122
|
+
### The code change for `set fillStyle`
|
123
|
+
|
124
|
+
The fix for this would be of only setting the color when it's been changed.
|
125
|
+
|
126
|
+
```js
|
127
|
+
worker.addEventListener('message', message => {
|
128
|
+
const { data } = message;
|
129
|
+
for (let i = 0; i < data.color.length; i++) {
|
130
|
+
const nextColor = data.color[i]
|
131
|
+
if (prevColor !== nextColor) {
|
132
|
+
// Only update the color if it's changed.
|
133
|
+
ctx.fillStyle = nextColor;
|
134
|
+
prevColor = nextColor;
|
135
|
+
}
|
136
|
+
ctx.fillRect(data.x[i], data.y[i], data.w[i], data.h[i]);
|
137
|
+
}
|
138
|
+
});
|
139
|
+
```
|
140
|
+
|
141
|
+
### The resulting profile:
|
142
|
+
|
143
|
+
Strictly following the steps to reproduce from above produces the following profile:
|
144
|
+
|
145
|
+
https://perfht.ml/2IlI15x
|
146
|
+
|
147
|
+
The header shows that much less time is being spent drawing to the screen. The time before was ~65ms per frame, while now it is ~40ms. In terms of frame-rate, this is a jump from 15fps to 25fps. The change is 1.6 times faster.
|
148
|
+
|
149
|
+
![Screenshot of the thread list and the faster times..](./images/bunny-analysis/fillstyle-thread-list-measure.png)
|
150
|
+
|
151
|
+
The flame chart shows the summary of where time was spent. The overall length of the x axis will not change when there is less time being spent, but it can still show the magnitude of the difference. First off, on `drawRects`, there is no visible sample of `fillStyle`. This shows that the fix is working.
|
152
|
+
|
153
|
+
![Screenshot of the flame graph of the content process after applying the fix.](./images/bunny-analysis/flame-graph-set-fillstyle.png)
|
154
|
+
|
155
|
+
The flame graph can still provide the information for the magnitude of the change. The way to do this is to look at the `(root)` function on the stack. The idle stacks were dropped in the steps to reproduce, so the only remaining samples are once where (presumably) work was being done. `(root)` has a running time of 2107ms before and while it has a running time of 1613ms after. This is a difference of 1.3 times. However, the range selection is a bit fuzzy, so the FPS is probably a better indicator for this analysis, and the actual result that is visible to the end-user. It's always important to be optimized for the perceived performance characteristics.
|
156
|
+
|
157
|
+
## Fixing structured cloning
|
158
|
+
|
159
|
+
The larger chunk of work, and probably harder to optimize, is the [structured clone](https://developer.mozilla.org/en-US/docs/Web/API/Web_Workers_API/Structured_clone_algorithm). The definition of this algorithm is available on MDN and states:
|
160
|
+
|
161
|
+
> The structured clone algorithm is an algorithm defined by the HTML5 specification for copying complex JavaScript objects. It is used internally when transferring data to and from Workers via postMessage() or when storing objects with IndexedDB. It builds up a clone by recursing through the input object while maintaining a map of previously visited references in order to avoid infinitely traversing cycles.
|
162
|
+
|
163
|
+
This sounds like a lot of work, so is there any easier way to transfer this data and make it more dense? Right now the structure looks like this:
|
164
|
+
|
165
|
+
```js
|
166
|
+
// worker.js would post something like this to the iframe:
|
167
|
+
self.postMessage({
|
168
|
+
color: ['#fff', '#fff', '#fff', '#fff', '#fff'],
|
169
|
+
x: [10, 20, 30, 40, 50],
|
170
|
+
y: [10, 20, 30, 40, 50],
|
171
|
+
w: [3, 3, 3, 3, 3],
|
172
|
+
h: [3, 3, 3, 3, 3],
|
173
|
+
});
|
174
|
+
```
|
175
|
+
|
176
|
+
Already the structure is optimized to not have lots of little objects, making it more GC-friendly. Perhaps it can be made more dense. The structured cloning algorithm has to take into account many of the complexities of JavaScript arrays. The entire array needs to be traversed in order to be copied and each item needs to be considered. We know that the array at `x` only contains numbers, but the JS engine does not.
|
177
|
+
|
178
|
+
Perhaps it would be better to send over typed arrays, that better match the data that will be sent over. A typed array is probably much simpler in its internal representation for cloning.
|
179
|
+
|
180
|
+
Another thing is that duplicating the strings over and over could get expensive and unnecessarily bloat the code. It would be better to store a table of strings and use an array that stores indexes into that table.
|
181
|
+
|
182
|
+
## The code
|
183
|
+
|
184
|
+
This may be a little bit verbose for this analysis, so this may be useful to skim only.
|
185
|
+
|
186
|
+
The first trick is to provide a growable array, that is backed by a typed array, but allows for arbitrarily pushing on new data. This is similar to how Rust's `Vec` type works.
|
187
|
+
|
188
|
+
```js
|
189
|
+
class GrowableArray {
|
190
|
+
constructor(dataType, capacity) {
|
191
|
+
this.dataType = dataType;
|
192
|
+
this.length = 0;
|
193
|
+
this.capacity = capacity;
|
194
|
+
this._array = new dataType(capacity);
|
195
|
+
}
|
196
|
+
|
197
|
+
push(number) {
|
198
|
+
if (this.length === this.capacity) {
|
199
|
+
this.capacity *= 2;
|
200
|
+
const newArray = new this.dataType(this.capacity);
|
201
|
+
for (let i = 0; i < this._array.length; i++) {
|
202
|
+
// Copy over the values.
|
203
|
+
newArray[i] = this._array[i];
|
204
|
+
}
|
205
|
+
this._array = newArray;
|
206
|
+
}
|
207
|
+
this._array[this.length] = number;
|
208
|
+
this.length++;
|
209
|
+
}
|
210
|
+
|
211
|
+
reset() {
|
212
|
+
this.length = 0;
|
213
|
+
}
|
214
|
+
}
|
215
|
+
```
|
216
|
+
|
217
|
+
This is then used by:
|
218
|
+
|
219
|
+
```js
|
220
|
+
// Create a Uint16Array with an initial capacity of 512;
|
221
|
+
const array = new GrowableArray(Uint16Array, 512);
|
222
|
+
array.push(1);
|
223
|
+
array.push(3);
|
224
|
+
array.push(5);
|
225
|
+
|
226
|
+
console.log(array._array);
|
227
|
+
// > Uint16Array(16) [1, 3, 5, 0, 0, 0, 0, ... ]
|
228
|
+
console.log(array.length);
|
229
|
+
// > 3
|
230
|
+
```
|
231
|
+
|
232
|
+
Finally, when posting the message, the code would send over the bare typed arrays.
|
233
|
+
|
234
|
+
```js
|
235
|
+
self.postMessage({
|
236
|
+
stringTable,
|
237
|
+
color: colorArray._array,
|
238
|
+
x: xArray._array,
|
239
|
+
y: yArray._array,
|
240
|
+
h: hArray._array,
|
241
|
+
w: wArray._array,
|
242
|
+
length: colorArray.length,
|
243
|
+
})
|
244
|
+
```
|
245
|
+
<!--alex ignore simple-->
|
246
|
+
This makes the code much more complex and hard to maintain, but it could be the key to better performance. This is a common trade-off with fast code and simple code. It's important that any additional complexities are backed by an analysis that it actually affects user-perceived performance.
|
247
|
+
|
248
|
+
### The resulting structured clone profile:
|
249
|
+
|
250
|
+
Right away the visual look of the thread list is much more dense. There are many more frames being rendered to the screen.
|
251
|
+
|
252
|
+
https://perfht.ml/2Ir30DT
|
253
|
+
|
254
|
+
![A screenshot of the thread list with the new code changes](./images/bunny-analysis/clone-thread-list.png)
|
255
|
+
|
256
|
+
Zooming in now to see the timing, each frame is within the budget of 16ms per-frame. The script is now running at 60fps.
|
257
|
+
|
258
|
+
Looking at the content process now, the time is primarily being taken up in the `fillRect` call. Structured cloning doesn't really even show up. There are a few samples when filtering for it, but the time spent there is mostly negligible.
|
259
|
+
|
260
|
+
![A screenshot of the flame graph of the content process' main thread](./images/bunny-analysis/clone-flame-content.png)
|
261
|
+
|
262
|
+
The structured cloning is also gone from the worker process. Now it is mostly the user's evaluated code, which we do not have control over.
|
263
|
+
|
264
|
+
![A screenshot of the flame graph of the worker](./images/bunny-analysis/clone-flame-worker.png)
|
265
|
+
|
266
|
+
## Conclusion
|
267
|
+
|
268
|
+
Profiling the code revealed a quick fix to setting `fillStyle`. These code changes didn't really increase complexity of the codebase but had a sizable user impact. This was a case were caching saved the cost of re-computing a value.
|
269
|
+
|
270
|
+
The structured cloning code was a more complicated problem to solve. The solution ended up increasing the complexity of the code, but is justified by the fairly dramatic end-user benefit. The solution was figured out through thinking about the algorithmic complexity of the structured cloning algorithm, and figuring out a way to fit the constraints of the project's data into a faster data structure.
|
271
|
+
|
272
|
+
In the analysis, only the functions that were taking the most time were considered for optimization. This helps to prioritize impactful work and mitigate the dangers of introducing unneeded complexity to the codebase.
|
273
|
+
|
274
|
+
A good follow-up would be to do more analysis on a variety of different test cases to ensure that these changes didn't regress performance on a different example.
|
275
|
+
|
276
|
+
| Metric | Baseline | Fix 1 | Fix 2 | Magnitude Change |
|
277
|
+
| --- | --- | --- | --- | --- |
|
278
|
+
| Time per frame | ~65ms | ~40ms | ~16ms | 4x (faster) |
|
279
|
+
| Frames per second | ~15fps | ~25fps | ~60fps | 4x (faster) |
|
280
|
+
| non-idle time in `(root)` on the content process | 2107ms | 1613ms | 725ms | 2.9x (faster) |
|
281
|
+
| non-idle time in `(root)` on the worker | 666ms | 814ms | 725ms | 0.9x (slower) |
|
@@ -0,0 +1,160 @@
|
|
1
|
+
/* stylelint-disable */
|
2
|
+
|
3
|
+
body {
|
4
|
+
font-family: sans-serif;
|
5
|
+
color: #0c0c0d;
|
6
|
+
}
|
7
|
+
|
8
|
+
.sidebar {
|
9
|
+
background-color: #f9f9fa;
|
10
|
+
}
|
11
|
+
|
12
|
+
/* The general sidebar link color. */
|
13
|
+
.sidebar ul li a {
|
14
|
+
color: #0c0c0d;
|
15
|
+
}
|
16
|
+
|
17
|
+
/**
|
18
|
+
* Add an underline to the current selected article by adding another pseudo-element. The
|
19
|
+
* border property is not enough, as there is margin on the list elements.
|
20
|
+
*/
|
21
|
+
.sidebar ul li::before {
|
22
|
+
/* This removes the "-" for the titles coming from the page inserted in the
|
23
|
+
* sidebar. Indeed they go in the way of the underline below and aren't
|
24
|
+
* especially nice either.
|
25
|
+
*/
|
26
|
+
content: "";
|
27
|
+
}
|
28
|
+
|
29
|
+
/* Note that ::before is important here. Indeed we need to position the element
|
30
|
+
* using margin-top, which gives the position relatively to the top of the
|
31
|
+
* element. Note that the position context isn't the <li> but the full sidebar,
|
32
|
+
* so that it extends towards the left of the sidebar.*/
|
33
|
+
.sidebar ul li.active::before {
|
34
|
+
content: "";
|
35
|
+
position: absolute;
|
36
|
+
left: 0;
|
37
|
+
width: 100%;
|
38
|
+
height: 3px;
|
39
|
+
background: #0a84ff;
|
40
|
+
z-index: 0;
|
41
|
+
margin-top: 30px;
|
42
|
+
}
|
43
|
+
|
44
|
+
/* The active sidebar link. */
|
45
|
+
.sidebar ul li.active > a {
|
46
|
+
border-right: none;
|
47
|
+
border-bottom: 2px;
|
48
|
+
font-weight: normal;
|
49
|
+
color: #003eaa;
|
50
|
+
}
|
51
|
+
|
52
|
+
/* The first level of links in the sidebar. */
|
53
|
+
.sidebar-nav > ul > li > a {
|
54
|
+
margin-top: 25px;
|
55
|
+
font-size: 18px;
|
56
|
+
font-weight: bold !important; /* Override the .active class */
|
57
|
+
}
|
58
|
+
|
59
|
+
/* Modify the profiler.firefox.com title in the sidebar. */
|
60
|
+
.sidebar > h1 {
|
61
|
+
margin-left: 15px;
|
62
|
+
margin-bottom: 30px;
|
63
|
+
text-align: left;
|
64
|
+
font-weight: bold;
|
65
|
+
}
|
66
|
+
|
67
|
+
.sidebar > h1 a {
|
68
|
+
color: #0c0c0d;
|
69
|
+
}
|
70
|
+
|
71
|
+
/* Add the favicon image. */
|
72
|
+
.sidebar > h1::before {
|
73
|
+
content: "";
|
74
|
+
position: relative;
|
75
|
+
top: 7px;
|
76
|
+
width: 30px;
|
77
|
+
height: 30px;
|
78
|
+
display: inline-block;
|
79
|
+
margin-right: 12px;
|
80
|
+
background-image: url(../images/favicon.svg);
|
81
|
+
background-position: 0;
|
82
|
+
background-size: 100%;
|
83
|
+
}
|
84
|
+
|
85
|
+
/* Make it so that YouTube videos are responsive and full width. */
|
86
|
+
.youtube {
|
87
|
+
position: relative;
|
88
|
+
width: 100%;
|
89
|
+
height: 0;
|
90
|
+
padding-bottom: 56.66%;
|
91
|
+
}
|
92
|
+
|
93
|
+
.youtube iframe {
|
94
|
+
position: absolute;
|
95
|
+
width: 100%;
|
96
|
+
height: 100%;
|
97
|
+
}
|
98
|
+
|
99
|
+
/* Makes it easier to read on big screens */
|
100
|
+
.markdown-section {
|
101
|
+
max-width: 800px;
|
102
|
+
}
|
103
|
+
|
104
|
+
/* The markdown tables felt a little too tight. */
|
105
|
+
.markdown-section td,
|
106
|
+
.markdown-section th {
|
107
|
+
padding: 8px 15px;
|
108
|
+
line-height: 1.3;
|
109
|
+
}
|
110
|
+
|
111
|
+
/* Make the inline code blocks the same style as GitHub */
|
112
|
+
.markdown-section code,
|
113
|
+
.markdown-section pre {
|
114
|
+
color: #0c0c0d;
|
115
|
+
border-radius: 3px;
|
116
|
+
}
|
117
|
+
|
118
|
+
/* Code blocks in headers are sized weird. Use em rather than rem to make the sizing
|
119
|
+
* relative to the context. */
|
120
|
+
.markdown-section code {
|
121
|
+
/* Override: font-size: 0.8rem; */
|
122
|
+
font-size: 0.8em;
|
123
|
+
}
|
124
|
+
|
125
|
+
/**
|
126
|
+
* The emphasis fails accessibility guidelines.
|
127
|
+
*/
|
128
|
+
.markdown-section em {
|
129
|
+
/* Use the same color as the surrounding text. */
|
130
|
+
color: rgb(44, 62, 80);
|
131
|
+
font-weight: bold;
|
132
|
+
}
|
133
|
+
|
134
|
+
@media screen and (max-width:768px) {
|
135
|
+
/* Make the sidebar toggle not block the text, and be at the top of the screen */
|
136
|
+
.sidebar-toggle {
|
137
|
+
top: 0;
|
138
|
+
bottom: auto;
|
139
|
+
padding: 17px;
|
140
|
+
background-color: rgba(255, 255, 255, 0.8);
|
141
|
+
}
|
142
|
+
|
143
|
+
body.close .sidebar-toggle {
|
144
|
+
padding: 17px;
|
145
|
+
background-color: transparent;
|
146
|
+
}
|
147
|
+
|
148
|
+
/* The search bar needs to accomodate the sidebar toggle */
|
149
|
+
.search > .input-wrap {
|
150
|
+
margin-left: 40px;
|
151
|
+
}
|
152
|
+
|
153
|
+
.app-nav li {
|
154
|
+
margin: 0 0.25rem;
|
155
|
+
}
|
156
|
+
|
157
|
+
.app-nav a {
|
158
|
+
font-size: 12px;
|
159
|
+
}
|
160
|
+
}
|