extzstd 0.0.3.CONCEPT → 0.3.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (138) hide show
  1. checksums.yaml +5 -5
  2. data/HISTORY.ja.md +39 -0
  3. data/LICENSE +6 -6
  4. data/README.md +26 -45
  5. data/contrib/zstd/CHANGELOG +555 -0
  6. data/contrib/zstd/CODE_OF_CONDUCT.md +5 -0
  7. data/contrib/zstd/CONTRIBUTING.md +392 -0
  8. data/contrib/zstd/COPYING +339 -0
  9. data/contrib/zstd/LICENSE +13 -9
  10. data/contrib/zstd/Makefile +414 -0
  11. data/contrib/zstd/README.md +170 -45
  12. data/contrib/zstd/TESTING.md +44 -0
  13. data/contrib/zstd/appveyor.yml +289 -0
  14. data/contrib/zstd/lib/BUCK +234 -0
  15. data/contrib/zstd/lib/Makefile +354 -0
  16. data/contrib/zstd/lib/README.md +179 -0
  17. data/contrib/zstd/{common → lib/common}/bitstream.h +170 -130
  18. data/contrib/zstd/lib/common/compiler.h +175 -0
  19. data/contrib/zstd/lib/common/cpu.h +215 -0
  20. data/contrib/zstd/lib/common/debug.c +24 -0
  21. data/contrib/zstd/lib/common/debug.h +114 -0
  22. data/contrib/zstd/{common → lib/common}/entropy_common.c +79 -94
  23. data/contrib/zstd/lib/common/error_private.c +55 -0
  24. data/contrib/zstd/lib/common/error_private.h +80 -0
  25. data/contrib/zstd/{common → lib/common}/fse.h +153 -93
  26. data/contrib/zstd/{common → lib/common}/fse_decompress.c +37 -82
  27. data/contrib/zstd/lib/common/huf.h +340 -0
  28. data/contrib/zstd/{common → lib/common}/mem.h +154 -78
  29. data/contrib/zstd/lib/common/pool.c +344 -0
  30. data/contrib/zstd/lib/common/pool.h +84 -0
  31. data/contrib/zstd/lib/common/threading.c +121 -0
  32. data/contrib/zstd/lib/common/threading.h +155 -0
  33. data/contrib/zstd/{common → lib/common}/xxhash.c +85 -75
  34. data/contrib/zstd/{common → lib/common}/xxhash.h +85 -73
  35. data/contrib/zstd/lib/common/zstd_common.c +83 -0
  36. data/contrib/zstd/lib/common/zstd_errors.h +94 -0
  37. data/contrib/zstd/lib/common/zstd_internal.h +447 -0
  38. data/contrib/zstd/{compress → lib/compress}/fse_compress.c +194 -303
  39. data/contrib/zstd/lib/compress/hist.c +183 -0
  40. data/contrib/zstd/lib/compress/hist.h +75 -0
  41. data/contrib/zstd/lib/compress/huf_compress.c +798 -0
  42. data/contrib/zstd/lib/compress/zstd_compress.c +4278 -0
  43. data/contrib/zstd/lib/compress/zstd_compress_internal.h +1125 -0
  44. data/contrib/zstd/lib/compress/zstd_compress_literals.c +158 -0
  45. data/contrib/zstd/lib/compress/zstd_compress_literals.h +29 -0
  46. data/contrib/zstd/lib/compress/zstd_compress_sequences.c +419 -0
  47. data/contrib/zstd/lib/compress/zstd_compress_sequences.h +54 -0
  48. data/contrib/zstd/lib/compress/zstd_compress_superblock.c +845 -0
  49. data/contrib/zstd/lib/compress/zstd_compress_superblock.h +32 -0
  50. data/contrib/zstd/lib/compress/zstd_cwksp.h +525 -0
  51. data/contrib/zstd/lib/compress/zstd_double_fast.c +521 -0
  52. data/contrib/zstd/lib/compress/zstd_double_fast.h +38 -0
  53. data/contrib/zstd/lib/compress/zstd_fast.c +496 -0
  54. data/contrib/zstd/lib/compress/zstd_fast.h +37 -0
  55. data/contrib/zstd/lib/compress/zstd_lazy.c +1138 -0
  56. data/contrib/zstd/lib/compress/zstd_lazy.h +67 -0
  57. data/contrib/zstd/lib/compress/zstd_ldm.c +619 -0
  58. data/contrib/zstd/lib/compress/zstd_ldm.h +110 -0
  59. data/contrib/zstd/lib/compress/zstd_opt.c +1200 -0
  60. data/contrib/zstd/lib/compress/zstd_opt.h +56 -0
  61. data/contrib/zstd/lib/compress/zstdmt_compress.c +2143 -0
  62. data/contrib/zstd/lib/compress/zstdmt_compress.h +192 -0
  63. data/contrib/zstd/lib/decompress/huf_decompress.c +1248 -0
  64. data/contrib/zstd/lib/decompress/zstd_ddict.c +244 -0
  65. data/contrib/zstd/lib/decompress/zstd_ddict.h +44 -0
  66. data/contrib/zstd/lib/decompress/zstd_decompress.c +1885 -0
  67. data/contrib/zstd/lib/decompress/zstd_decompress_block.c +1432 -0
  68. data/contrib/zstd/lib/decompress/zstd_decompress_block.h +59 -0
  69. data/contrib/zstd/lib/decompress/zstd_decompress_internal.h +189 -0
  70. data/contrib/zstd/{common → lib/deprecated}/zbuff.h +86 -69
  71. data/contrib/zstd/lib/deprecated/zbuff_common.c +26 -0
  72. data/contrib/zstd/lib/deprecated/zbuff_compress.c +147 -0
  73. data/contrib/zstd/lib/deprecated/zbuff_decompress.c +75 -0
  74. data/contrib/zstd/lib/dictBuilder/cover.c +1236 -0
  75. data/contrib/zstd/lib/dictBuilder/cover.h +157 -0
  76. data/contrib/zstd/{dictBuilder → lib/dictBuilder}/divsufsort.c +3 -3
  77. data/contrib/zstd/{dictBuilder → lib/dictBuilder}/divsufsort.h +5 -5
  78. data/contrib/zstd/lib/dictBuilder/fastcover.c +757 -0
  79. data/contrib/zstd/{dictBuilder → lib/dictBuilder}/zdict.c +437 -347
  80. data/contrib/zstd/lib/dictBuilder/zdict.h +305 -0
  81. data/contrib/zstd/lib/legacy/zstd_legacy.h +415 -0
  82. data/contrib/zstd/{legacy → lib/legacy}/zstd_v01.c +272 -292
  83. data/contrib/zstd/{legacy → lib/legacy}/zstd_v01.h +26 -32
  84. data/contrib/zstd/{legacy → lib/legacy}/zstd_v02.c +162 -392
  85. data/contrib/zstd/{legacy → lib/legacy}/zstd_v02.h +26 -32
  86. data/contrib/zstd/{legacy → lib/legacy}/zstd_v03.c +162 -391
  87. data/contrib/zstd/{legacy → lib/legacy}/zstd_v03.h +27 -33
  88. data/contrib/zstd/{legacy → lib/legacy}/zstd_v04.c +195 -604
  89. data/contrib/zstd/{legacy → lib/legacy}/zstd_v04.h +26 -32
  90. data/contrib/zstd/{legacy → lib/legacy}/zstd_v05.c +300 -575
  91. data/contrib/zstd/{legacy → lib/legacy}/zstd_v05.h +22 -31
  92. data/contrib/zstd/{legacy → lib/legacy}/zstd_v06.c +165 -592
  93. data/contrib/zstd/{legacy → lib/legacy}/zstd_v06.h +54 -67
  94. data/contrib/zstd/lib/legacy/zstd_v07.c +4541 -0
  95. data/contrib/zstd/lib/legacy/zstd_v07.h +187 -0
  96. data/contrib/zstd/lib/libzstd.pc.in +15 -0
  97. data/contrib/zstd/lib/zstd.h +2090 -0
  98. data/ext/depend +2 -0
  99. data/ext/extconf.rb +18 -5
  100. data/ext/extzstd.c +296 -214
  101. data/ext/extzstd.h +81 -36
  102. data/ext/extzstd_nogvls.h +0 -117
  103. data/ext/extzstd_stream.c +622 -0
  104. data/ext/libzstd_conf.h +8 -0
  105. data/ext/zstd_common.c +11 -0
  106. data/ext/zstd_compress.c +15 -0
  107. data/ext/zstd_decompress.c +6 -0
  108. data/ext/zstd_dictbuilder.c +10 -0
  109. data/ext/zstd_dictbuilder_fastcover.c +3 -0
  110. data/ext/zstd_legacy_v01.c +3 -1
  111. data/ext/zstd_legacy_v02.c +3 -1
  112. data/ext/zstd_legacy_v03.c +3 -1
  113. data/ext/zstd_legacy_v04.c +3 -1
  114. data/ext/zstd_legacy_v05.c +3 -1
  115. data/ext/zstd_legacy_v06.c +3 -1
  116. data/ext/zstd_legacy_v07.c +3 -0
  117. data/gemstub.rb +27 -21
  118. data/lib/extzstd.rb +82 -161
  119. data/lib/extzstd/version.rb +1 -1
  120. data/test/test_basic.rb +19 -6
  121. metadata +127 -59
  122. data/contrib/zstd/common/error_private.h +0 -125
  123. data/contrib/zstd/common/error_public.h +0 -77
  124. data/contrib/zstd/common/huf.h +0 -228
  125. data/contrib/zstd/common/zstd.h +0 -475
  126. data/contrib/zstd/common/zstd_common.c +0 -91
  127. data/contrib/zstd/common/zstd_internal.h +0 -238
  128. data/contrib/zstd/compress/huf_compress.c +0 -577
  129. data/contrib/zstd/compress/zbuff_compress.c +0 -327
  130. data/contrib/zstd/compress/zstd_compress.c +0 -3074
  131. data/contrib/zstd/compress/zstd_opt.h +0 -1046
  132. data/contrib/zstd/decompress/huf_decompress.c +0 -894
  133. data/contrib/zstd/decompress/zbuff_decompress.c +0 -294
  134. data/contrib/zstd/decompress/zstd_decompress.c +0 -1362
  135. data/contrib/zstd/dictBuilder/zdict.h +0 -113
  136. data/contrib/zstd/legacy/zstd_legacy.h +0 -140
  137. data/ext/extzstd_buffered.c +0 -265
  138. data/ext/zstd_amalgam.c +0 -18
@@ -0,0 +1,5 @@
1
+ # Code of Conduct
2
+
3
+ Facebook has adopted a Code of Conduct that we expect project participants to adhere to.
4
+ Please read the [full text](https://code.fb.com/codeofconduct/)
5
+ so that you can understand what actions will and will not be tolerated.
@@ -0,0 +1,392 @@
1
+ # Contributing to Zstandard
2
+ We want to make contributing to this project as easy and transparent as
3
+ possible.
4
+
5
+ ## Our Development Process
6
+ New versions are being developed in the "dev" branch,
7
+ or in their own feature branch.
8
+ When they are deemed ready for a release, they are merged into "master".
9
+
10
+ As a consequences, all contributions must stage first through "dev"
11
+ or their own feature branch.
12
+
13
+ ## Pull Requests
14
+ We actively welcome your pull requests.
15
+
16
+ 1. Fork the repo and create your branch from `dev`.
17
+ 2. If you've added code that should be tested, add tests.
18
+ 3. If you've changed APIs, update the documentation.
19
+ 4. Ensure the test suite passes.
20
+ 5. Make sure your code lints.
21
+ 6. If you haven't already, complete the Contributor License Agreement ("CLA").
22
+
23
+ ## Contributor License Agreement ("CLA")
24
+ In order to accept your pull request, we need you to submit a CLA. You only need
25
+ to do this once to work on any of Facebook's open source projects.
26
+
27
+ Complete your CLA here: <https://code.facebook.com/cla>
28
+
29
+ ## Workflow
30
+ Zstd uses a branch-based workflow for making changes to the codebase. Typically, zstd
31
+ will use a new branch per sizable topic. For smaller changes, it is okay to lump multiple
32
+ related changes into a branch.
33
+
34
+ Our contribution process works in three main stages:
35
+ 1. Local development
36
+ * Update:
37
+ * Checkout your fork of zstd if you have not already
38
+ ```
39
+ git checkout https://github.com/<username>/zstd
40
+ cd zstd
41
+ ```
42
+ * Update your local dev branch
43
+ ```
44
+ git pull https://github.com/facebook/zstd dev
45
+ git push origin dev
46
+ ```
47
+ * Topic and development:
48
+ * Make a new branch on your fork about the topic you're developing for
49
+ ```
50
+ # branch names should be consise but sufficiently informative
51
+ git checkout -b <branch-name>
52
+ git push origin <branch-name>
53
+ ```
54
+ * Make commits and push
55
+ ```
56
+ # make some changes =
57
+ git add -u && git commit -m <message>
58
+ git push origin <branch-name>
59
+ ```
60
+ * Note: run local tests to ensure that your changes didn't break existing functionality
61
+ * Quick check
62
+ ```
63
+ make shortest
64
+ ```
65
+ * Longer check
66
+ ```
67
+ make test
68
+ ```
69
+ 2. Code Review and CI tests
70
+ * Ensure CI tests pass:
71
+ * Before sharing anything to the community, make sure that all CI tests pass on your local fork.
72
+ See our section on setting up your CI environment for more information on how to do this.
73
+ * Ensure that static analysis passes on your development machine. See the Static Analysis section
74
+ below to see how to do this.
75
+ * Create a pull request:
76
+ * When you are ready to share you changes to the community, create a pull request from your branch
77
+ to facebook:dev. You can do this very easily by clicking 'Create Pull Request' on your fork's home
78
+ page.
79
+ * From there, select the branch where you made changes as your source branch and facebook:dev
80
+ as the destination.
81
+ * Examine the diff presented between the two branches to make sure there is nothing unexpected.
82
+ * Write a good pull request description:
83
+ * While there is no strict template that our contributors follow, we would like them to
84
+ sufficiently summarize and motivate the changes they are proposing. We recommend all pull requests,
85
+ at least indirectly, address the following points.
86
+ * Is this pull request important and why?
87
+ * Is it addressing an issue? If so, what issue? (provide links for convenience please)
88
+ * Is this a new feature? If so, why is it useful and/or necessary?
89
+ * Are there background references and documents that reviewers should be aware of to properly assess this change?
90
+ * Note: make sure to point out any design and architectural decisions that you made and the rationale behind them.
91
+ * Note: if you have been working with a specific user and would like them to review your work, make sure you mention them using (@<username>)
92
+ * Submit the pull request and iterate with feedback.
93
+ 3. Merge and Release
94
+ * Getting approval:
95
+ * You will have to iterate on your changes with feedback from other collaborators to reach a point
96
+ where your pull request can be safely merged.
97
+ * To avoid too many comments on style and convention, make sure that you have a
98
+ look at our style section below before creating a pull request.
99
+ * Eventually, someone from the zstd team will approve your pull request and not long after merge it into
100
+ the dev branch.
101
+ * Housekeeping:
102
+ * Most PRs are linked with one or more Github issues. If this is the case for your PR, make sure
103
+ the corresponding issue is mentioned. If your change 'fixes' or completely addresses the
104
+ issue at hand, then please indicate this by requesting that an issue be closed by commenting.
105
+ * Just because your changes have been merged does not mean the topic or larger issue is complete. Remember
106
+ that the change must make it to an official zstd release for it to be meaningful. We recommend
107
+ that contributers track the activity on their pull request and corresponding issue(s) page(s) until
108
+ their change makes it to the next release of zstd. Users will often discover bugs in your code or
109
+ suggest ways to refine and improve your initial changes even after the pull request is merged.
110
+
111
+ ## Static Analysis
112
+ Static analysis is a process for examining the correctness or validity of a program without actually
113
+ executing it. It usually helps us find many simple bugs. Zstd uses clang's `scan-build` tool for
114
+ static analysis. You can install it by following the instructions for your OS on https://clang-analyzer.llvm.org/scan-build.
115
+
116
+ Once installed, you can ensure that our static analysis tests pass on your local development machine
117
+ by running:
118
+ ```
119
+ make staticAnalyze
120
+ ```
121
+
122
+ In general, you can use `scan-build` to static analyze any build script. For example, to static analyze
123
+ just `contrib/largeNbDicts` and nothing else, you can run:
124
+
125
+ ```
126
+ scan-build make -C contrib/largeNbDicts largeNbDicts
127
+ ```
128
+
129
+ ## Performance
130
+ Performance is extremely important for zstd and we only merge pull requests whose performance
131
+ landscape and corresponding trade-offs have been adequately analyzed, reproduced, and presented.
132
+ This high bar for performance means that every PR which has the potential to
133
+ impact performance takes a very long time for us to properly review. That being said, we
134
+ always welcome contributions to improve performance (or worsen performance for the trade-off of
135
+ something else). Please keep the following in mind before submitting a performance related PR:
136
+
137
+ 1. Zstd isn't as old as gzip but it has been around for time now and its evolution is
138
+ very well documented via past Github issues and pull requests. It may be the case that your
139
+ particular performance optimization has already been considered in the past. Please take some
140
+ time to search through old issues and pull requests using keywords specific to your
141
+ would-be PR. Of course, just because a topic has already been discussed (and perhaps rejected
142
+ on some grounds) in the past, doesn't mean it isn't worth bringing up again. But even in that case,
143
+ it will be helpful for you to have context from that topic's history before contributing.
144
+ 2. The distinction between noise and actual performance gains can unfortunately be very subtle
145
+ especially when microbenchmarking extremely small wins or losses. The only remedy to getting
146
+ something subtle merged is extensive benchmarking. You will be doing us a great favor if you
147
+ take the time to run extensive, long-duration, and potentially cross-(os, platform, process, etc)
148
+ benchmarks on your end before submitting a PR. Of course, you will not be able to benchmark
149
+ your changes on every single processor and os out there (and neither will we) but do that best
150
+ you can:) We've adding some things to think about when benchmarking below in the Benchmarking
151
+ Performance section which might be helpful for you.
152
+ 3. Optimizing performance for a certain OS, processor vendor, compiler, or network system is a perfectly
153
+ legitimate thing to do as long as it does not harm the overall performance health of Zstd.
154
+ This is a hard balance to strike but please keep in mind other aspects of Zstd when
155
+ submitting changes that are clang-specific, windows-specific, etc.
156
+
157
+ ## Benchmarking Performance
158
+ Performance microbenchmarking is a tricky subject but also essential for Zstd. We value empirical
159
+ testing over theoretical speculation. This guide it not perfect but for most scenarios, it
160
+ is a good place to start.
161
+
162
+ ### Stability
163
+ Unfortunately, the most important aspect in being able to benchmark reliably is to have a stable
164
+ benchmarking machine. A virtual machine, a machine with shared resources, or your laptop
165
+ will typically not be stable enough to obtain reliable benchmark results. If you can get your
166
+ hands on a desktop, this is usually a better scenario.
167
+
168
+ Of course, benchmarking can be done on non-hyper-stable machines as well. You will just have to
169
+ do a little more work to ensure that you are in fact measuring the changes you've made not and
170
+ noise. Here are some things you can do to make your benchmarks more stable:
171
+
172
+ 1. The most simple thing you can do to drastically improve the stability of your benchmark is
173
+ to run it multiple times and then aggregate the results of those runs. As a general rule of
174
+ thumb, the smaller the change you are trying to measure, the more samples of benchmark runs
175
+ you will have to aggregate over to get reliable results. Here are some additional things to keep in
176
+ mind when running multiple trials:
177
+ * How you aggregate your samples are important. You might be tempted to use the mean of your
178
+ results. While this is certainly going to be a more stable number than a raw single sample
179
+ benchmark number, you might have more luck by taking the median. The mean is not robust to
180
+ outliers whereas the median is. Better still, you could simply take the fastest speed your
181
+ benchmark achieved on each run since that is likely the fastest your process will be
182
+ capable of running your code. In our experience, this (aggregating by just taking the sample
183
+ with the fastest running time) has been the most stable approach.
184
+ * The more samples you have, the more stable your benchmarks should be. You can verify
185
+ your improved stability by looking at the size of your confidence intervals as you
186
+ increase your sample count. These should get smaller and smaller. Eventually hopefully
187
+ smaller than the performance win you are expecting.
188
+ * Most processors will take some time to get `hot` when running anything. The observations
189
+ you collect during that time period will very different from the true performance number. Having
190
+ a very large number of sample will help alleviate this problem slightly but you can also
191
+ address is directly by simply not including the first `n` iterations of your benchmark in
192
+ your aggregations. You can determine `n` by simply looking at the results from each iteration
193
+ and then hand picking a good threshold after which the variance in results seems to stabilize.
194
+ 2. You cannot really get reliable benchmarks if your host machine is simultaneously running
195
+ another cpu/memory-intensive application in the background. If you are running benchmarks on your
196
+ personal laptop for instance, you should close all applications (including your code editor and
197
+ browser) before running your benchmarks. You might also have invisible background applications
198
+ running. You can see what these are by looking at either Activity Monitor on Mac or Task Manager
199
+ on Windows. You will get more stable benchmark results of you end those processes as well.
200
+ * If you have multiple cores, you can even run your benchmark on a reserved core to prevent
201
+ pollution from other OS and user processes. There are a number of ways to do this depending
202
+ on your OS:
203
+ * On linux boxes, you have use https://github.com/lpechacek/cpuset.
204
+ * On Windows, you can "Set Processor Affinity" using https://www.thewindowsclub.com/processor-affinity-windows
205
+ * On Mac, you can try to use their dedicated affinity API https://developer.apple.com/library/archive/releasenotes/Performance/RN-AffinityAPI/#//apple_ref/doc/uid/TP40006635-CH1-DontLinkElementID_2
206
+ 3. To benchmark, you will likely end up writing a separate c/c++ program that will link libzstd.
207
+ Dynamically linking your library will introduce some added variation (not a large amount but
208
+ definitely some). Statically linking libzstd will be more stable. Static libraries should
209
+ be enabled by default when building zstd.
210
+ 4. Use a profiler with a good high resolution timer. See the section below on profiling for
211
+ details on this.
212
+ 5. Disable frequency scaling, turbo boost and address space randomization (this will vary by OS)
213
+ 6. Try to avoid storage. On some systems you can use tmpfs. Putting the program, inputs and outputs on
214
+ tmpfs avoids touching a real storage system, which can have a pretty big variability.
215
+
216
+ Also check our LLVM's guide on benchmarking here: https://llvm.org/docs/Benchmarking.html
217
+
218
+ ### Zstd benchmark
219
+ The fastest signal you can get regarding your performance changes is via the in-build zstd cli
220
+ bench option. You can run Zstd as you typically would for your scenario using some set of options
221
+ and then additionally also specify the `-b#` option. Doing this will run our benchmarking pipeline
222
+ for that options you have just provided. If you want to look at the internals of how this
223
+ benchmarking script works, you can check out programs/benchzstd.c
224
+
225
+ For example: say you have made a change that you believe improves the speed of zstd level 1. The
226
+ very first thing you should use to asses whether you actually achieved any sort of improvement
227
+ is `zstd -b`. You might try to do something like this. Note: you can use the `-i` option to
228
+ specify a running time for your benchmark in seconds (default is 3 seconds).
229
+ Usually, the longer the running time, the more stable your results will be.
230
+
231
+ ```
232
+ $ git checkout <commit-before-your-change>
233
+ $ make && cp zstd zstd-old
234
+ $ git checkout <commit-after-your-change>
235
+ $ make && cp zstd zstd-new
236
+ $ zstd-old -i5 -b1 <your-test-data>
237
+ 1<your-test-data> : 8990 -> 3992 (2.252), 302.6 MB/s , 626.4 MB/s
238
+ $ zstd-new -i5 -b1 <your-test-data>
239
+ 1<your-test-data> : 8990 -> 3992 (2.252), 302.8 MB/s , 628.4 MB/s
240
+ ```
241
+
242
+ Unless your performance win is large enough to be visible despite the intrinsic noise
243
+ on your computer, benchzstd alone will likely not be enough to validate the impact of your
244
+ changes. For example, the results of the example above indicate that effectively nothing
245
+ changed but there could be a small <3% improvement that the noise on the host machine
246
+ obscured. So unless you see a large performance win (10-15% consistently) using just
247
+ this method of evaluation will not be sufficient.
248
+
249
+ ### Profiling
250
+ There are a number of great profilers out there. We're going to briefly mention how you can
251
+ profile your code using `instruments` on mac, `perf` on linux and `visual studio profiler`
252
+ on windows.
253
+
254
+ Say you have an idea for a change that you think will provide some good performance gains
255
+ for level 1 compression on Zstd. Typically this means, you have identified a section of
256
+ code that you think can be made to run faster.
257
+
258
+ The first thing you will want to do is make sure that the piece of code is actually taking up
259
+ a notable amount of time to run. It is usually not worth optimzing something which accounts for less than
260
+ 0.0001% of the total running time. Luckily, there are tools to help with this.
261
+ Profilers will let you see how much time your code spends inside a particular function.
262
+ If your target code snippit is only part of a function, it might be worth trying to
263
+ isolate that snippit by moving it to its own function (this is usually not necessary but
264
+ might be).
265
+
266
+ Most profilers (including the profilers dicusssed below) will generate a call graph of
267
+ functions for you. Your goal will be to find your function of interest in this call grapch
268
+ and then inspect the time spent inside of it. You might also want to to look at the
269
+ annotated assembly which most profilers will provide you with.
270
+
271
+ #### Instruments
272
+ We will once again consider the scenario where you think you've identified a piece of code
273
+ whose performance can be improved upon. Follow these steps to profile your code using
274
+ Instruments.
275
+
276
+ 1. Open Instruments
277
+ 2. Select `Time Profiler` from the list of standard templates
278
+ 3. Close all other applications except for your instruments window and your terminal
279
+ 4. Run your benchmarking script from your terminal window
280
+ * You will want a benchmark that runs for at least a few seconds (5 seconds will
281
+ usually be long enough). This way the profiler will have something to work with
282
+ and you will have ample time to attach your profiler to this process:)
283
+ * I will just use benchzstd as my bencharmking script for this example:
284
+ ```
285
+ $ zstd -b1 -i5 <my-data> # this will run for 5 seconds
286
+ ```
287
+ 5. Once you run your benchmarking script, switch back over to instruments and attach your
288
+ process to the time profiler. You can do this by:
289
+ * Clicking on the `All Processes` drop down in the top left of the toolbar.
290
+ * Selecting your process from the dropdown. In my case, it is just going to be labled
291
+ `zstd`
292
+ * Hitting the bright red record circle button on the top left of the toolbar
293
+ 6. You profiler will now start collecting metrics from your bencharking script. Once
294
+ you think you have collected enough samples (usually this is the case after 3 seconds of
295
+ recording), stop your profiler.
296
+ 7. Make sure that in toolbar of the bottom window, `profile` is selected.
297
+ 8. You should be able to see your call graph.
298
+ * If you don't see the call graph or an incomplete call graph, make sure you have compiled
299
+ zstd and your benchmarking scripg using debug flags. On mac and linux, this just means
300
+ you will have to supply the `-g` flag alone with your build script. You might also
301
+ have to provide the `-fno-omit-frame-pointer` flag
302
+ 9. Dig down the graph to find your function call and then inspect it by double clicking
303
+ the list item. You will be able to see the annotated source code and the assembly side by
304
+ side.
305
+
306
+ #### Perf
307
+
308
+ This wiki has a pretty detailed tutorial on getting started working with perf so we'll
309
+ leave you to check that out of you're getting started:
310
+
311
+ https://perf.wiki.kernel.org/index.php/Tutorial
312
+
313
+ Some general notes on perf:
314
+ * Use `perf stat -r # <bench-program>` to quickly get some relevant timing and
315
+ counter statistics. Perf uses a high resolution timer and this is likely one
316
+ of the first things your team will run when assessing your PR.
317
+ * Perf has a long list of hardware counters that can be viewed with `perf --list`.
318
+ When measuring optimizations, something worth trying is to make sure the handware
319
+ counters you expect to be impacted by your change are in fact being so. For example,
320
+ if you expect the L1 cache misses to decrease with your change, you can look at the
321
+ counter `L1-dcache-load-misses`
322
+ * Perf hardware counters will not work on a virtual machine.
323
+
324
+ #### Visual Studio
325
+
326
+ TODO
327
+
328
+
329
+ ## Setting up continuous integration (CI) on your fork
330
+ Zstd uses a number of different continuous integration (CI) tools to ensure that new changes
331
+ are well tested before they make it to an official release. Specifically, we use the platforms
332
+ travis-ci, circle-ci, and appveyor.
333
+
334
+ Changes cannot be merged into the main dev branch unless they pass all of our CI tests.
335
+ The easiest way to run these CI tests on your own before submitting a PR to our dev branch
336
+ is to configure your personal fork of zstd with each of the CI platforms. Below, you'll find
337
+ instructions for doing this.
338
+
339
+ ### travis-ci
340
+ Follow these steps to link travis-ci with your github fork of zstd
341
+
342
+ 1. Make sure you are logged into your github account
343
+ 2. Go to https://travis-ci.org/
344
+ 3. Click 'Sign in with Github' on the top right
345
+ 4. Click 'Authorize travis-ci'
346
+ 5. Click 'Activate all repositories using Github Apps'
347
+ 6. Select 'Only select repositories' and select your fork of zstd from the drop down
348
+ 7. Click 'Approve and Install'
349
+ 8. Click 'Sign in with Github' again. This time, it will be for travis-pro (which will let you view your tests on the web dashboard)
350
+ 9. Click 'Authorize travis-pro'
351
+ 10. You should have travis set up on your fork now.
352
+
353
+ ### circle-ci
354
+ TODO
355
+
356
+ ### appveyor
357
+ Follow these steps to link circle-ci with your girhub fork of zstd
358
+
359
+ 1. Make sure you are logged into your github account
360
+ 2. Go to https://www.appveyor.com/
361
+ 3. Click 'Sign in' on the top right
362
+ 4. Select 'Github' on the left panel
363
+ 5. Click 'Authorize appveyor'
364
+ 6. You might be asked to select which repositories you want to give appveyor permission to. Select your fork of zstd if you're prompted
365
+ 7. You should have appveyor set up on your fork now.
366
+
367
+ ### General notes on CI
368
+ CI tests run every time a pull request (PR) is created or updated. The exact tests
369
+ that get run will depend on the destination branch you specify. Some tests take
370
+ longer to run than others. Currently, our CI is set up to run a short
371
+ series of tests when creating a PR to the dev branch and a longer series of tests
372
+ when creating a PR to the master branch. You can look in the configuration files
373
+ of the respective CI platform for more information on what gets run when.
374
+
375
+ Most people will just want to create a PR with the destination set to their local dev
376
+ branch of zstd. You can then find the status of the tests on the PR's page. You can also
377
+ re-run tests and cancel running tests from the PR page or from the respective CI's dashboard.
378
+
379
+ ## Issues
380
+ We use GitHub issues to track public bugs. Please ensure your description is
381
+ clear and has sufficient instructions to be able to reproduce the issue.
382
+
383
+ Facebook has a [bounty program](https://www.facebook.com/whitehat/) for the safe
384
+ disclosure of security bugs. In those cases, please go through the process
385
+ outlined on that page and do not file a public issue.
386
+
387
+ ## Coding Style
388
+ * 4 spaces for indentation rather than tabs
389
+
390
+ ## License
391
+ By contributing to Zstandard, you agree that your contributions will be licensed
392
+ under both the [LICENSE](LICENSE) file and the [COPYING](COPYING) file in the root directory of this source tree.
@@ -0,0 +1,339 @@
1
+ GNU GENERAL PUBLIC LICENSE
2
+ Version 2, June 1991
3
+
4
+ Copyright (C) 1989, 1991 Free Software Foundation, Inc.,
5
+ 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
6
+ Everyone is permitted to copy and distribute verbatim copies
7
+ of this license document, but changing it is not allowed.
8
+
9
+ Preamble
10
+
11
+ The licenses for most software are designed to take away your
12
+ freedom to share and change it. By contrast, the GNU General Public
13
+ License is intended to guarantee your freedom to share and change free
14
+ software--to make sure the software is free for all its users. This
15
+ General Public License applies to most of the Free Software
16
+ Foundation's software and to any other program whose authors commit to
17
+ using it. (Some other Free Software Foundation software is covered by
18
+ the GNU Lesser General Public License instead.) You can apply it to
19
+ your programs, too.
20
+
21
+ When we speak of free software, we are referring to freedom, not
22
+ price. Our General Public Licenses are designed to make sure that you
23
+ have the freedom to distribute copies of free software (and charge for
24
+ this service if you wish), that you receive source code or can get it
25
+ if you want it, that you can change the software or use pieces of it
26
+ in new free programs; and that you know you can do these things.
27
+
28
+ To protect your rights, we need to make restrictions that forbid
29
+ anyone to deny you these rights or to ask you to surrender the rights.
30
+ These restrictions translate to certain responsibilities for you if you
31
+ distribute copies of the software, or if you modify it.
32
+
33
+ For example, if you distribute copies of such a program, whether
34
+ gratis or for a fee, you must give the recipients all the rights that
35
+ you have. You must make sure that they, too, receive or can get the
36
+ source code. And you must show them these terms so they know their
37
+ rights.
38
+
39
+ We protect your rights with two steps: (1) copyright the software, and
40
+ (2) offer you this license which gives you legal permission to copy,
41
+ distribute and/or modify the software.
42
+
43
+ Also, for each author's protection and ours, we want to make certain
44
+ that everyone understands that there is no warranty for this free
45
+ software. If the software is modified by someone else and passed on, we
46
+ want its recipients to know that what they have is not the original, so
47
+ that any problems introduced by others will not reflect on the original
48
+ authors' reputations.
49
+
50
+ Finally, any free program is threatened constantly by software
51
+ patents. We wish to avoid the danger that redistributors of a free
52
+ program will individually obtain patent licenses, in effect making the
53
+ program proprietary. To prevent this, we have made it clear that any
54
+ patent must be licensed for everyone's free use or not licensed at all.
55
+
56
+ The precise terms and conditions for copying, distribution and
57
+ modification follow.
58
+
59
+ GNU GENERAL PUBLIC LICENSE
60
+ TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
61
+
62
+ 0. This License applies to any program or other work which contains
63
+ a notice placed by the copyright holder saying it may be distributed
64
+ under the terms of this General Public License. The "Program", below,
65
+ refers to any such program or work, and a "work based on the Program"
66
+ means either the Program or any derivative work under copyright law:
67
+ that is to say, a work containing the Program or a portion of it,
68
+ either verbatim or with modifications and/or translated into another
69
+ language. (Hereinafter, translation is included without limitation in
70
+ the term "modification".) Each licensee is addressed as "you".
71
+
72
+ Activities other than copying, distribution and modification are not
73
+ covered by this License; they are outside its scope. The act of
74
+ running the Program is not restricted, and the output from the Program
75
+ is covered only if its contents constitute a work based on the
76
+ Program (independent of having been made by running the Program).
77
+ Whether that is true depends on what the Program does.
78
+
79
+ 1. You may copy and distribute verbatim copies of the Program's
80
+ source code as you receive it, in any medium, provided that you
81
+ conspicuously and appropriately publish on each copy an appropriate
82
+ copyright notice and disclaimer of warranty; keep intact all the
83
+ notices that refer to this License and to the absence of any warranty;
84
+ and give any other recipients of the Program a copy of this License
85
+ along with the Program.
86
+
87
+ You may charge a fee for the physical act of transferring a copy, and
88
+ you may at your option offer warranty protection in exchange for a fee.
89
+
90
+ 2. You may modify your copy or copies of the Program or any portion
91
+ of it, thus forming a work based on the Program, and copy and
92
+ distribute such modifications or work under the terms of Section 1
93
+ above, provided that you also meet all of these conditions:
94
+
95
+ a) You must cause the modified files to carry prominent notices
96
+ stating that you changed the files and the date of any change.
97
+
98
+ b) You must cause any work that you distribute or publish, that in
99
+ whole or in part contains or is derived from the Program or any
100
+ part thereof, to be licensed as a whole at no charge to all third
101
+ parties under the terms of this License.
102
+
103
+ c) If the modified program normally reads commands interactively
104
+ when run, you must cause it, when started running for such
105
+ interactive use in the most ordinary way, to print or display an
106
+ announcement including an appropriate copyright notice and a
107
+ notice that there is no warranty (or else, saying that you provide
108
+ a warranty) and that users may redistribute the program under
109
+ these conditions, and telling the user how to view a copy of this
110
+ License. (Exception: if the Program itself is interactive but
111
+ does not normally print such an announcement, your work based on
112
+ the Program is not required to print an announcement.)
113
+
114
+ These requirements apply to the modified work as a whole. If
115
+ identifiable sections of that work are not derived from the Program,
116
+ and can be reasonably considered independent and separate works in
117
+ themselves, then this License, and its terms, do not apply to those
118
+ sections when you distribute them as separate works. But when you
119
+ distribute the same sections as part of a whole which is a work based
120
+ on the Program, the distribution of the whole must be on the terms of
121
+ this License, whose permissions for other licensees extend to the
122
+ entire whole, and thus to each and every part regardless of who wrote it.
123
+
124
+ Thus, it is not the intent of this section to claim rights or contest
125
+ your rights to work written entirely by you; rather, the intent is to
126
+ exercise the right to control the distribution of derivative or
127
+ collective works based on the Program.
128
+
129
+ In addition, mere aggregation of another work not based on the Program
130
+ with the Program (or with a work based on the Program) on a volume of
131
+ a storage or distribution medium does not bring the other work under
132
+ the scope of this License.
133
+
134
+ 3. You may copy and distribute the Program (or a work based on it,
135
+ under Section 2) in object code or executable form under the terms of
136
+ Sections 1 and 2 above provided that you also do one of the following:
137
+
138
+ a) Accompany it with the complete corresponding machine-readable
139
+ source code, which must be distributed under the terms of Sections
140
+ 1 and 2 above on a medium customarily used for software interchange; or,
141
+
142
+ b) Accompany it with a written offer, valid for at least three
143
+ years, to give any third party, for a charge no more than your
144
+ cost of physically performing source distribution, a complete
145
+ machine-readable copy of the corresponding source code, to be
146
+ distributed under the terms of Sections 1 and 2 above on a medium
147
+ customarily used for software interchange; or,
148
+
149
+ c) Accompany it with the information you received as to the offer
150
+ to distribute corresponding source code. (This alternative is
151
+ allowed only for noncommercial distribution and only if you
152
+ received the program in object code or executable form with such
153
+ an offer, in accord with Subsection b above.)
154
+
155
+ The source code for a work means the preferred form of the work for
156
+ making modifications to it. For an executable work, complete source
157
+ code means all the source code for all modules it contains, plus any
158
+ associated interface definition files, plus the scripts used to
159
+ control compilation and installation of the executable. However, as a
160
+ special exception, the source code distributed need not include
161
+ anything that is normally distributed (in either source or binary
162
+ form) with the major components (compiler, kernel, and so on) of the
163
+ operating system on which the executable runs, unless that component
164
+ itself accompanies the executable.
165
+
166
+ If distribution of executable or object code is made by offering
167
+ access to copy from a designated place, then offering equivalent
168
+ access to copy the source code from the same place counts as
169
+ distribution of the source code, even though third parties are not
170
+ compelled to copy the source along with the object code.
171
+
172
+ 4. You may not copy, modify, sublicense, or distribute the Program
173
+ except as expressly provided under this License. Any attempt
174
+ otherwise to copy, modify, sublicense or distribute the Program is
175
+ void, and will automatically terminate your rights under this License.
176
+ However, parties who have received copies, or rights, from you under
177
+ this License will not have their licenses terminated so long as such
178
+ parties remain in full compliance.
179
+
180
+ 5. You are not required to accept this License, since you have not
181
+ signed it. However, nothing else grants you permission to modify or
182
+ distribute the Program or its derivative works. These actions are
183
+ prohibited by law if you do not accept this License. Therefore, by
184
+ modifying or distributing the Program (or any work based on the
185
+ Program), you indicate your acceptance of this License to do so, and
186
+ all its terms and conditions for copying, distributing or modifying
187
+ the Program or works based on it.
188
+
189
+ 6. Each time you redistribute the Program (or any work based on the
190
+ Program), the recipient automatically receives a license from the
191
+ original licensor to copy, distribute or modify the Program subject to
192
+ these terms and conditions. You may not impose any further
193
+ restrictions on the recipients' exercise of the rights granted herein.
194
+ You are not responsible for enforcing compliance by third parties to
195
+ this License.
196
+
197
+ 7. If, as a consequence of a court judgment or allegation of patent
198
+ infringement or for any other reason (not limited to patent issues),
199
+ conditions are imposed on you (whether by court order, agreement or
200
+ otherwise) that contradict the conditions of this License, they do not
201
+ excuse you from the conditions of this License. If you cannot
202
+ distribute so as to satisfy simultaneously your obligations under this
203
+ License and any other pertinent obligations, then as a consequence you
204
+ may not distribute the Program at all. For example, if a patent
205
+ license would not permit royalty-free redistribution of the Program by
206
+ all those who receive copies directly or indirectly through you, then
207
+ the only way you could satisfy both it and this License would be to
208
+ refrain entirely from distribution of the Program.
209
+
210
+ If any portion of this section is held invalid or unenforceable under
211
+ any particular circumstance, the balance of the section is intended to
212
+ apply and the section as a whole is intended to apply in other
213
+ circumstances.
214
+
215
+ It is not the purpose of this section to induce you to infringe any
216
+ patents or other property right claims or to contest validity of any
217
+ such claims; this section has the sole purpose of protecting the
218
+ integrity of the free software distribution system, which is
219
+ implemented by public license practices. Many people have made
220
+ generous contributions to the wide range of software distributed
221
+ through that system in reliance on consistent application of that
222
+ system; it is up to the author/donor to decide if he or she is willing
223
+ to distribute software through any other system and a licensee cannot
224
+ impose that choice.
225
+
226
+ This section is intended to make thoroughly clear what is believed to
227
+ be a consequence of the rest of this License.
228
+
229
+ 8. If the distribution and/or use of the Program is restricted in
230
+ certain countries either by patents or by copyrighted interfaces, the
231
+ original copyright holder who places the Program under this License
232
+ may add an explicit geographical distribution limitation excluding
233
+ those countries, so that distribution is permitted only in or among
234
+ countries not thus excluded. In such case, this License incorporates
235
+ the limitation as if written in the body of this License.
236
+
237
+ 9. The Free Software Foundation may publish revised and/or new versions
238
+ of the General Public License from time to time. Such new versions will
239
+ be similar in spirit to the present version, but may differ in detail to
240
+ address new problems or concerns.
241
+
242
+ Each version is given a distinguishing version number. If the Program
243
+ specifies a version number of this License which applies to it and "any
244
+ later version", you have the option of following the terms and conditions
245
+ either of that version or of any later version published by the Free
246
+ Software Foundation. If the Program does not specify a version number of
247
+ this License, you may choose any version ever published by the Free Software
248
+ Foundation.
249
+
250
+ 10. If you wish to incorporate parts of the Program into other free
251
+ programs whose distribution conditions are different, write to the author
252
+ to ask for permission. For software which is copyrighted by the Free
253
+ Software Foundation, write to the Free Software Foundation; we sometimes
254
+ make exceptions for this. Our decision will be guided by the two goals
255
+ of preserving the free status of all derivatives of our free software and
256
+ of promoting the sharing and reuse of software generally.
257
+
258
+ NO WARRANTY
259
+
260
+ 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
261
+ FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN
262
+ OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
263
+ PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED
264
+ OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
265
+ MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS
266
+ TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE
267
+ PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING,
268
+ REPAIR OR CORRECTION.
269
+
270
+ 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
271
+ WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
272
+ REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,
273
+ INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING
274
+ OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED
275
+ TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY
276
+ YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER
277
+ PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE
278
+ POSSIBILITY OF SUCH DAMAGES.
279
+
280
+ END OF TERMS AND CONDITIONS
281
+
282
+ How to Apply These Terms to Your New Programs
283
+
284
+ If you develop a new program, and you want it to be of the greatest
285
+ possible use to the public, the best way to achieve this is to make it
286
+ free software which everyone can redistribute and change under these terms.
287
+
288
+ To do so, attach the following notices to the program. It is safest
289
+ to attach them to the start of each source file to most effectively
290
+ convey the exclusion of warranty; and each file should have at least
291
+ the "copyright" line and a pointer to where the full notice is found.
292
+
293
+ <one line to give the program's name and a brief idea of what it does.>
294
+ Copyright (C) <year> <name of author>
295
+
296
+ This program is free software; you can redistribute it and/or modify
297
+ it under the terms of the GNU General Public License as published by
298
+ the Free Software Foundation; either version 2 of the License, or
299
+ (at your option) any later version.
300
+
301
+ This program is distributed in the hope that it will be useful,
302
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
303
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
304
+ GNU General Public License for more details.
305
+
306
+ You should have received a copy of the GNU General Public License along
307
+ with this program; if not, write to the Free Software Foundation, Inc.,
308
+ 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
309
+
310
+ Also add information on how to contact you by electronic and paper mail.
311
+
312
+ If the program is interactive, make it output a short notice like this
313
+ when it starts in an interactive mode:
314
+
315
+ Gnomovision version 69, Copyright (C) year name of author
316
+ Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
317
+ This is free software, and you are welcome to redistribute it
318
+ under certain conditions; type `show c' for details.
319
+
320
+ The hypothetical commands `show w' and `show c' should show the appropriate
321
+ parts of the General Public License. Of course, the commands you use may
322
+ be called something other than `show w' and `show c'; they could even be
323
+ mouse-clicks or menu items--whatever suits your program.
324
+
325
+ You should also get your employer (if you work as a programmer) or your
326
+ school, if any, to sign a "copyright disclaimer" for the program, if
327
+ necessary. Here is a sample; alter the names:
328
+
329
+ Yoyodyne, Inc., hereby disclaims all copyright interest in the program
330
+ `Gnomovision' (which makes passes at compilers) written by James Hacker.
331
+
332
+ <signature of Ty Coon>, 1 April 1989
333
+ Ty Coon, President of Vice
334
+
335
+ This General Public License does not permit incorporating your program into
336
+ proprietary programs. If your program is a subroutine library, you may
337
+ consider it more useful to permit linking proprietary applications with the
338
+ library. If this is what you want to do, use the GNU Lesser General
339
+ Public License instead of this License.