rj_schema 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (299) hide show
  1. checksums.yaml +7 -0
  2. data/Rakefile +18 -0
  3. data/ext/rj_schema/extconf.rb +7 -0
  4. data/ext/rj_schema/rapidjson/CHANGELOG.md +158 -0
  5. data/ext/rj_schema/rapidjson/CMakeLists.txt +221 -0
  6. data/ext/rj_schema/rapidjson/CMakeModules/FindGTestSrc.cmake +30 -0
  7. data/ext/rj_schema/rapidjson/RapidJSON.pc.in +7 -0
  8. data/ext/rj_schema/rapidjson/RapidJSONConfig.cmake.in +15 -0
  9. data/ext/rj_schema/rapidjson/RapidJSONConfigVersion.cmake.in +10 -0
  10. data/ext/rj_schema/rapidjson/appveyor.yml +41 -0
  11. data/ext/rj_schema/rapidjson/bin/data/glossary.json +22 -0
  12. data/ext/rj_schema/rapidjson/bin/data/menu.json +27 -0
  13. data/ext/rj_schema/rapidjson/bin/data/readme.txt +1 -0
  14. data/ext/rj_schema/rapidjson/bin/data/sample.json +3315 -0
  15. data/ext/rj_schema/rapidjson/bin/data/webapp.json +88 -0
  16. data/ext/rj_schema/rapidjson/bin/data/widget.json +26 -0
  17. data/ext/rj_schema/rapidjson/bin/draft-04/schema +150 -0
  18. data/ext/rj_schema/rapidjson/bin/encodings/utf16be.json +0 -0
  19. data/ext/rj_schema/rapidjson/bin/encodings/utf16bebom.json +0 -0
  20. data/ext/rj_schema/rapidjson/bin/encodings/utf16le.json +0 -0
  21. data/ext/rj_schema/rapidjson/bin/encodings/utf16lebom.json +0 -0
  22. data/ext/rj_schema/rapidjson/bin/encodings/utf32be.json +0 -0
  23. data/ext/rj_schema/rapidjson/bin/encodings/utf32bebom.json +0 -0
  24. data/ext/rj_schema/rapidjson/bin/encodings/utf32le.json +0 -0
  25. data/ext/rj_schema/rapidjson/bin/encodings/utf32lebom.json +0 -0
  26. data/ext/rj_schema/rapidjson/bin/encodings/utf8.json +7 -0
  27. data/ext/rj_schema/rapidjson/bin/encodings/utf8bom.json +7 -0
  28. data/ext/rj_schema/rapidjson/bin/jsonchecker/fail1.json +1 -0
  29. data/ext/rj_schema/rapidjson/bin/jsonchecker/fail10.json +1 -0
  30. data/ext/rj_schema/rapidjson/bin/jsonchecker/fail11.json +1 -0
  31. data/ext/rj_schema/rapidjson/bin/jsonchecker/fail12.json +1 -0
  32. data/ext/rj_schema/rapidjson/bin/jsonchecker/fail13.json +1 -0
  33. data/ext/rj_schema/rapidjson/bin/jsonchecker/fail14.json +1 -0
  34. data/ext/rj_schema/rapidjson/bin/jsonchecker/fail15.json +1 -0
  35. data/ext/rj_schema/rapidjson/bin/jsonchecker/fail16.json +1 -0
  36. data/ext/rj_schema/rapidjson/bin/jsonchecker/fail17.json +1 -0
  37. data/ext/rj_schema/rapidjson/bin/jsonchecker/fail18.json +1 -0
  38. data/ext/rj_schema/rapidjson/bin/jsonchecker/fail19.json +1 -0
  39. data/ext/rj_schema/rapidjson/bin/jsonchecker/fail2.json +1 -0
  40. data/ext/rj_schema/rapidjson/bin/jsonchecker/fail20.json +1 -0
  41. data/ext/rj_schema/rapidjson/bin/jsonchecker/fail21.json +1 -0
  42. data/ext/rj_schema/rapidjson/bin/jsonchecker/fail22.json +1 -0
  43. data/ext/rj_schema/rapidjson/bin/jsonchecker/fail23.json +1 -0
  44. data/ext/rj_schema/rapidjson/bin/jsonchecker/fail24.json +1 -0
  45. data/ext/rj_schema/rapidjson/bin/jsonchecker/fail25.json +1 -0
  46. data/ext/rj_schema/rapidjson/bin/jsonchecker/fail26.json +1 -0
  47. data/ext/rj_schema/rapidjson/bin/jsonchecker/fail27.json +2 -0
  48. data/ext/rj_schema/rapidjson/bin/jsonchecker/fail28.json +2 -0
  49. data/ext/rj_schema/rapidjson/bin/jsonchecker/fail29.json +1 -0
  50. data/ext/rj_schema/rapidjson/bin/jsonchecker/fail3.json +1 -0
  51. data/ext/rj_schema/rapidjson/bin/jsonchecker/fail30.json +1 -0
  52. data/ext/rj_schema/rapidjson/bin/jsonchecker/fail31.json +1 -0
  53. data/ext/rj_schema/rapidjson/bin/jsonchecker/fail32.json +1 -0
  54. data/ext/rj_schema/rapidjson/bin/jsonchecker/fail33.json +1 -0
  55. data/ext/rj_schema/rapidjson/bin/jsonchecker/fail4.json +1 -0
  56. data/ext/rj_schema/rapidjson/bin/jsonchecker/fail5.json +1 -0
  57. data/ext/rj_schema/rapidjson/bin/jsonchecker/fail6.json +1 -0
  58. data/ext/rj_schema/rapidjson/bin/jsonchecker/fail7.json +1 -0
  59. data/ext/rj_schema/rapidjson/bin/jsonchecker/fail8.json +1 -0
  60. data/ext/rj_schema/rapidjson/bin/jsonchecker/fail9.json +1 -0
  61. data/ext/rj_schema/rapidjson/bin/jsonchecker/pass1.json +58 -0
  62. data/ext/rj_schema/rapidjson/bin/jsonchecker/pass2.json +1 -0
  63. data/ext/rj_schema/rapidjson/bin/jsonchecker/pass3.json +6 -0
  64. data/ext/rj_schema/rapidjson/bin/jsonchecker/readme.txt +3 -0
  65. data/ext/rj_schema/rapidjson/bin/jsonschema/LICENSE +19 -0
  66. data/ext/rj_schema/rapidjson/bin/jsonschema/README.md +148 -0
  67. data/ext/rj_schema/rapidjson/bin/jsonschema/bin/jsonschema_suite +283 -0
  68. data/ext/rj_schema/rapidjson/bin/jsonschema/remotes/folder/folderInteger.json +3 -0
  69. data/ext/rj_schema/rapidjson/bin/jsonschema/remotes/integer.json +3 -0
  70. data/ext/rj_schema/rapidjson/bin/jsonschema/remotes/subSchemas.json +8 -0
  71. data/ext/rj_schema/rapidjson/bin/jsonschema/tests/draft3/additionalItems.json +82 -0
  72. data/ext/rj_schema/rapidjson/bin/jsonschema/tests/draft3/additionalProperties.json +88 -0
  73. data/ext/rj_schema/rapidjson/bin/jsonschema/tests/draft3/default.json +49 -0
  74. data/ext/rj_schema/rapidjson/bin/jsonschema/tests/draft3/dependencies.json +108 -0
  75. data/ext/rj_schema/rapidjson/bin/jsonschema/tests/draft3/disallow.json +80 -0
  76. data/ext/rj_schema/rapidjson/bin/jsonschema/tests/draft3/divisibleBy.json +60 -0
  77. data/ext/rj_schema/rapidjson/bin/jsonschema/tests/draft3/enum.json +71 -0
  78. data/ext/rj_schema/rapidjson/bin/jsonschema/tests/draft3/extends.json +94 -0
  79. data/ext/rj_schema/rapidjson/bin/jsonschema/tests/draft3/items.json +46 -0
  80. data/ext/rj_schema/rapidjson/bin/jsonschema/tests/draft3/maxItems.json +28 -0
  81. data/ext/rj_schema/rapidjson/bin/jsonschema/tests/draft3/maxLength.json +33 -0
  82. data/ext/rj_schema/rapidjson/bin/jsonschema/tests/draft3/maximum.json +42 -0
  83. data/ext/rj_schema/rapidjson/bin/jsonschema/tests/draft3/minItems.json +28 -0
  84. data/ext/rj_schema/rapidjson/bin/jsonschema/tests/draft3/minLength.json +33 -0
  85. data/ext/rj_schema/rapidjson/bin/jsonschema/tests/draft3/minimum.json +42 -0
  86. data/ext/rj_schema/rapidjson/bin/jsonschema/tests/draft3/optional/bignum.json +107 -0
  87. data/ext/rj_schema/rapidjson/bin/jsonschema/tests/draft3/optional/format.json +222 -0
  88. data/ext/rj_schema/rapidjson/bin/jsonschema/tests/draft3/optional/jsregex.json +18 -0
  89. data/ext/rj_schema/rapidjson/bin/jsonschema/tests/draft3/optional/zeroTerminatedFloats.json +15 -0
  90. data/ext/rj_schema/rapidjson/bin/jsonschema/tests/draft3/pattern.json +34 -0
  91. data/ext/rj_schema/rapidjson/bin/jsonschema/tests/draft3/patternProperties.json +110 -0
  92. data/ext/rj_schema/rapidjson/bin/jsonschema/tests/draft3/properties.json +92 -0
  93. data/ext/rj_schema/rapidjson/bin/jsonschema/tests/draft3/ref.json +159 -0
  94. data/ext/rj_schema/rapidjson/bin/jsonschema/tests/draft3/refRemote.json +74 -0
  95. data/ext/rj_schema/rapidjson/bin/jsonschema/tests/draft3/required.json +53 -0
  96. data/ext/rj_schema/rapidjson/bin/jsonschema/tests/draft3/type.json +474 -0
  97. data/ext/rj_schema/rapidjson/bin/jsonschema/tests/draft3/uniqueItems.json +79 -0
  98. data/ext/rj_schema/rapidjson/bin/jsonschema/tests/draft4/additionalItems.json +82 -0
  99. data/ext/rj_schema/rapidjson/bin/jsonschema/tests/draft4/additionalProperties.json +88 -0
  100. data/ext/rj_schema/rapidjson/bin/jsonschema/tests/draft4/allOf.json +112 -0
  101. data/ext/rj_schema/rapidjson/bin/jsonschema/tests/draft4/anyOf.json +68 -0
  102. data/ext/rj_schema/rapidjson/bin/jsonschema/tests/draft4/default.json +49 -0
  103. data/ext/rj_schema/rapidjson/bin/jsonschema/tests/draft4/definitions.json +32 -0
  104. data/ext/rj_schema/rapidjson/bin/jsonschema/tests/draft4/dependencies.json +113 -0
  105. data/ext/rj_schema/rapidjson/bin/jsonschema/tests/draft4/enum.json +72 -0
  106. data/ext/rj_schema/rapidjson/bin/jsonschema/tests/draft4/items.json +46 -0
  107. data/ext/rj_schema/rapidjson/bin/jsonschema/tests/draft4/maxItems.json +28 -0
  108. data/ext/rj_schema/rapidjson/bin/jsonschema/tests/draft4/maxLength.json +33 -0
  109. data/ext/rj_schema/rapidjson/bin/jsonschema/tests/draft4/maxProperties.json +28 -0
  110. data/ext/rj_schema/rapidjson/bin/jsonschema/tests/draft4/maximum.json +42 -0
  111. data/ext/rj_schema/rapidjson/bin/jsonschema/tests/draft4/minItems.json +28 -0
  112. data/ext/rj_schema/rapidjson/bin/jsonschema/tests/draft4/minLength.json +33 -0
  113. data/ext/rj_schema/rapidjson/bin/jsonschema/tests/draft4/minProperties.json +28 -0
  114. data/ext/rj_schema/rapidjson/bin/jsonschema/tests/draft4/minimum.json +42 -0
  115. data/ext/rj_schema/rapidjson/bin/jsonschema/tests/draft4/multipleOf.json +60 -0
  116. data/ext/rj_schema/rapidjson/bin/jsonschema/tests/draft4/not.json +96 -0
  117. data/ext/rj_schema/rapidjson/bin/jsonschema/tests/draft4/oneOf.json +68 -0
  118. data/ext/rj_schema/rapidjson/bin/jsonschema/tests/draft4/optional/bignum.json +107 -0
  119. data/ext/rj_schema/rapidjson/bin/jsonschema/tests/draft4/optional/format.json +148 -0
  120. data/ext/rj_schema/rapidjson/bin/jsonschema/tests/draft4/optional/zeroTerminatedFloats.json +15 -0
  121. data/ext/rj_schema/rapidjson/bin/jsonschema/tests/draft4/pattern.json +34 -0
  122. data/ext/rj_schema/rapidjson/bin/jsonschema/tests/draft4/patternProperties.json +110 -0
  123. data/ext/rj_schema/rapidjson/bin/jsonschema/tests/draft4/properties.json +92 -0
  124. data/ext/rj_schema/rapidjson/bin/jsonschema/tests/draft4/ref.json +159 -0
  125. data/ext/rj_schema/rapidjson/bin/jsonschema/tests/draft4/refRemote.json +74 -0
  126. data/ext/rj_schema/rapidjson/bin/jsonschema/tests/draft4/required.json +39 -0
  127. data/ext/rj_schema/rapidjson/bin/jsonschema/tests/draft4/type.json +330 -0
  128. data/ext/rj_schema/rapidjson/bin/jsonschema/tests/draft4/uniqueItems.json +79 -0
  129. data/ext/rj_schema/rapidjson/bin/jsonschema/tox.ini +8 -0
  130. data/ext/rj_schema/rapidjson/bin/types/booleans.json +102 -0
  131. data/ext/rj_schema/rapidjson/bin/types/floats.json +102 -0
  132. data/ext/rj_schema/rapidjson/bin/types/guids.json +102 -0
  133. data/ext/rj_schema/rapidjson/bin/types/integers.json +102 -0
  134. data/ext/rj_schema/rapidjson/bin/types/mixed.json +592 -0
  135. data/ext/rj_schema/rapidjson/bin/types/nulls.json +102 -0
  136. data/ext/rj_schema/rapidjson/bin/types/paragraphs.json +102 -0
  137. data/ext/rj_schema/rapidjson/bin/types/readme.txt +1 -0
  138. data/ext/rj_schema/rapidjson/contrib/natvis/LICENSE +45 -0
  139. data/ext/rj_schema/rapidjson/contrib/natvis/README.md +7 -0
  140. data/ext/rj_schema/rapidjson/contrib/natvis/rapidjson.natvis +38 -0
  141. data/ext/rj_schema/rapidjson/doc/CMakeLists.txt +27 -0
  142. data/ext/rj_schema/rapidjson/doc/Doxyfile.in +2369 -0
  143. data/ext/rj_schema/rapidjson/doc/Doxyfile.zh-cn.in +2369 -0
  144. data/ext/rj_schema/rapidjson/doc/diagram/architecture.dot +50 -0
  145. data/ext/rj_schema/rapidjson/doc/diagram/architecture.png +0 -0
  146. data/ext/rj_schema/rapidjson/doc/diagram/insituparsing.dot +65 -0
  147. data/ext/rj_schema/rapidjson/doc/diagram/insituparsing.png +0 -0
  148. data/ext/rj_schema/rapidjson/doc/diagram/iterative-parser-states-diagram.dot +62 -0
  149. data/ext/rj_schema/rapidjson/doc/diagram/iterative-parser-states-diagram.png +0 -0
  150. data/ext/rj_schema/rapidjson/doc/diagram/makefile +8 -0
  151. data/ext/rj_schema/rapidjson/doc/diagram/move1.dot +47 -0
  152. data/ext/rj_schema/rapidjson/doc/diagram/move1.png +0 -0
  153. data/ext/rj_schema/rapidjson/doc/diagram/move2.dot +62 -0
  154. data/ext/rj_schema/rapidjson/doc/diagram/move2.png +0 -0
  155. data/ext/rj_schema/rapidjson/doc/diagram/move3.dot +60 -0
  156. data/ext/rj_schema/rapidjson/doc/diagram/move3.png +0 -0
  157. data/ext/rj_schema/rapidjson/doc/diagram/normalparsing.dot +56 -0
  158. data/ext/rj_schema/rapidjson/doc/diagram/normalparsing.png +0 -0
  159. data/ext/rj_schema/rapidjson/doc/diagram/simpledom.dot +54 -0
  160. data/ext/rj_schema/rapidjson/doc/diagram/simpledom.png +0 -0
  161. data/ext/rj_schema/rapidjson/doc/diagram/tutorial.dot +58 -0
  162. data/ext/rj_schema/rapidjson/doc/diagram/tutorial.png +0 -0
  163. data/ext/rj_schema/rapidjson/doc/diagram/utilityclass.dot +73 -0
  164. data/ext/rj_schema/rapidjson/doc/diagram/utilityclass.png +0 -0
  165. data/ext/rj_schema/rapidjson/doc/dom.md +280 -0
  166. data/ext/rj_schema/rapidjson/doc/dom.zh-cn.md +284 -0
  167. data/ext/rj_schema/rapidjson/doc/encoding.md +146 -0
  168. data/ext/rj_schema/rapidjson/doc/encoding.zh-cn.md +152 -0
  169. data/ext/rj_schema/rapidjson/doc/faq.md +289 -0
  170. data/ext/rj_schema/rapidjson/doc/faq.zh-cn.md +290 -0
  171. data/ext/rj_schema/rapidjson/doc/features.md +104 -0
  172. data/ext/rj_schema/rapidjson/doc/features.zh-cn.md +103 -0
  173. data/ext/rj_schema/rapidjson/doc/internals.md +368 -0
  174. data/ext/rj_schema/rapidjson/doc/internals.zh-cn.md +363 -0
  175. data/ext/rj_schema/rapidjson/doc/logo/rapidjson.png +0 -0
  176. data/ext/rj_schema/rapidjson/doc/logo/rapidjson.svg +119 -0
  177. data/ext/rj_schema/rapidjson/doc/misc/DoxygenLayout.xml +194 -0
  178. data/ext/rj_schema/rapidjson/doc/misc/doxygenextra.css +274 -0
  179. data/ext/rj_schema/rapidjson/doc/misc/footer.html +11 -0
  180. data/ext/rj_schema/rapidjson/doc/misc/header.html +24 -0
  181. data/ext/rj_schema/rapidjson/doc/npm.md +31 -0
  182. data/ext/rj_schema/rapidjson/doc/performance.md +26 -0
  183. data/ext/rj_schema/rapidjson/doc/performance.zh-cn.md +26 -0
  184. data/ext/rj_schema/rapidjson/doc/pointer.md +234 -0
  185. data/ext/rj_schema/rapidjson/doc/pointer.zh-cn.md +234 -0
  186. data/ext/rj_schema/rapidjson/doc/sax.md +509 -0
  187. data/ext/rj_schema/rapidjson/doc/sax.zh-cn.md +487 -0
  188. data/ext/rj_schema/rapidjson/doc/schema.md +505 -0
  189. data/ext/rj_schema/rapidjson/doc/schema.zh-cn.md +237 -0
  190. data/ext/rj_schema/rapidjson/doc/stream.md +426 -0
  191. data/ext/rj_schema/rapidjson/doc/stream.zh-cn.md +426 -0
  192. data/ext/rj_schema/rapidjson/doc/tutorial.md +536 -0
  193. data/ext/rj_schema/rapidjson/doc/tutorial.zh-cn.md +534 -0
  194. data/ext/rj_schema/rapidjson/docker/debian/Dockerfile +8 -0
  195. data/ext/rj_schema/rapidjson/example/CMakeLists.txt +45 -0
  196. data/ext/rj_schema/rapidjson/example/archiver/archiver.cpp +292 -0
  197. data/ext/rj_schema/rapidjson/example/archiver/archiver.h +145 -0
  198. data/ext/rj_schema/rapidjson/example/archiver/archivertest.cpp +287 -0
  199. data/ext/rj_schema/rapidjson/example/capitalize/capitalize.cpp +67 -0
  200. data/ext/rj_schema/rapidjson/example/condense/condense.cpp +32 -0
  201. data/ext/rj_schema/rapidjson/example/filterkey/filterkey.cpp +135 -0
  202. data/ext/rj_schema/rapidjson/example/filterkeydom/filterkeydom.cpp +170 -0
  203. data/ext/rj_schema/rapidjson/example/jsonx/jsonx.cpp +207 -0
  204. data/ext/rj_schema/rapidjson/example/lookaheadparser/lookaheadparser.cpp +350 -0
  205. data/ext/rj_schema/rapidjson/example/messagereader/messagereader.cpp +105 -0
  206. data/ext/rj_schema/rapidjson/example/parsebyparts/parsebyparts.cpp +176 -0
  207. data/ext/rj_schema/rapidjson/example/pretty/pretty.cpp +30 -0
  208. data/ext/rj_schema/rapidjson/example/prettyauto/prettyauto.cpp +56 -0
  209. data/ext/rj_schema/rapidjson/example/schemavalidator/schemavalidator.cpp +78 -0
  210. data/ext/rj_schema/rapidjson/example/serialize/serialize.cpp +173 -0
  211. data/ext/rj_schema/rapidjson/example/simpledom/simpledom.cpp +29 -0
  212. data/ext/rj_schema/rapidjson/example/simplepullreader/simplepullreader.cpp +53 -0
  213. data/ext/rj_schema/rapidjson/example/simplereader/simplereader.cpp +42 -0
  214. data/ext/rj_schema/rapidjson/example/simplewriter/simplewriter.cpp +36 -0
  215. data/ext/rj_schema/rapidjson/example/tutorial/tutorial.cpp +151 -0
  216. data/ext/rj_schema/rapidjson/include/rapidjson/allocators.h +271 -0
  217. data/ext/rj_schema/rapidjson/include/rapidjson/cursorstreamwrapper.h +78 -0
  218. data/ext/rj_schema/rapidjson/include/rapidjson/document.h +2630 -0
  219. data/ext/rj_schema/rapidjson/include/rapidjson/encodedstream.h +299 -0
  220. data/ext/rj_schema/rapidjson/include/rapidjson/encodings.h +716 -0
  221. data/ext/rj_schema/rapidjson/include/rapidjson/error/en.h +74 -0
  222. data/ext/rj_schema/rapidjson/include/rapidjson/error/error.h +161 -0
  223. data/ext/rj_schema/rapidjson/include/rapidjson/filereadstream.h +99 -0
  224. data/ext/rj_schema/rapidjson/include/rapidjson/filewritestream.h +104 -0
  225. data/ext/rj_schema/rapidjson/include/rapidjson/fwd.h +151 -0
  226. data/ext/rj_schema/rapidjson/include/rapidjson/internal/biginteger.h +290 -0
  227. data/ext/rj_schema/rapidjson/include/rapidjson/internal/diyfp.h +258 -0
  228. data/ext/rj_schema/rapidjson/include/rapidjson/internal/dtoa.h +245 -0
  229. data/ext/rj_schema/rapidjson/include/rapidjson/internal/ieee754.h +78 -0
  230. data/ext/rj_schema/rapidjson/include/rapidjson/internal/itoa.h +304 -0
  231. data/ext/rj_schema/rapidjson/include/rapidjson/internal/meta.h +181 -0
  232. data/ext/rj_schema/rapidjson/include/rapidjson/internal/pow10.h +55 -0
  233. data/ext/rj_schema/rapidjson/include/rapidjson/internal/regex.h +734 -0
  234. data/ext/rj_schema/rapidjson/include/rapidjson/internal/stack.h +231 -0
  235. data/ext/rj_schema/rapidjson/include/rapidjson/internal/strfunc.h +69 -0
  236. data/ext/rj_schema/rapidjson/include/rapidjson/internal/strtod.h +269 -0
  237. data/ext/rj_schema/rapidjson/include/rapidjson/internal/swap.h +46 -0
  238. data/ext/rj_schema/rapidjson/include/rapidjson/istreamwrapper.h +115 -0
  239. data/ext/rj_schema/rapidjson/include/rapidjson/memorybuffer.h +70 -0
  240. data/ext/rj_schema/rapidjson/include/rapidjson/memorystream.h +71 -0
  241. data/ext/rj_schema/rapidjson/include/rapidjson/msinttypes/inttypes.h +316 -0
  242. data/ext/rj_schema/rapidjson/include/rapidjson/msinttypes/stdint.h +300 -0
  243. data/ext/rj_schema/rapidjson/include/rapidjson/ostreamwrapper.h +81 -0
  244. data/ext/rj_schema/rapidjson/include/rapidjson/pointer.h +1363 -0
  245. data/ext/rj_schema/rapidjson/include/rapidjson/prettywriter.h +277 -0
  246. data/ext/rj_schema/rapidjson/include/rapidjson/rapidjson.h +628 -0
  247. data/ext/rj_schema/rapidjson/include/rapidjson/reader.h +2222 -0
  248. data/ext/rj_schema/rapidjson/include/rapidjson/schema.h +2479 -0
  249. data/ext/rj_schema/rapidjson/include/rapidjson/stream.h +223 -0
  250. data/ext/rj_schema/rapidjson/include/rapidjson/stringbuffer.h +121 -0
  251. data/ext/rj_schema/rapidjson/include/rapidjson/writer.h +716 -0
  252. data/ext/rj_schema/rapidjson/include_dirs.js +2 -0
  253. data/ext/rj_schema/rapidjson/library.json +15 -0
  254. data/ext/rj_schema/rapidjson/license.txt +57 -0
  255. data/ext/rj_schema/rapidjson/package.json +24 -0
  256. data/ext/rj_schema/rapidjson/rapidjson.autopkg +77 -0
  257. data/ext/rj_schema/rapidjson/readme.md +160 -0
  258. data/ext/rj_schema/rapidjson/readme.zh-cn.md +152 -0
  259. data/ext/rj_schema/rapidjson/test/CMakeLists.txt +20 -0
  260. data/ext/rj_schema/rapidjson/test/perftest/CMakeLists.txt +28 -0
  261. data/ext/rj_schema/rapidjson/test/perftest/misctest.cpp +974 -0
  262. data/ext/rj_schema/rapidjson/test/perftest/perftest.cpp +24 -0
  263. data/ext/rj_schema/rapidjson/test/perftest/perftest.h +185 -0
  264. data/ext/rj_schema/rapidjson/test/perftest/platformtest.cpp +166 -0
  265. data/ext/rj_schema/rapidjson/test/perftest/rapidjsontest.cpp +472 -0
  266. data/ext/rj_schema/rapidjson/test/perftest/schematest.cpp +216 -0
  267. data/ext/rj_schema/rapidjson/test/unittest/CMakeLists.txt +92 -0
  268. data/ext/rj_schema/rapidjson/test/unittest/allocatorstest.cpp +102 -0
  269. data/ext/rj_schema/rapidjson/test/unittest/bigintegertest.cpp +133 -0
  270. data/ext/rj_schema/rapidjson/test/unittest/cursorstreamwrappertest.cpp +115 -0
  271. data/ext/rj_schema/rapidjson/test/unittest/documenttest.cpp +672 -0
  272. data/ext/rj_schema/rapidjson/test/unittest/dtoatest.cpp +98 -0
  273. data/ext/rj_schema/rapidjson/test/unittest/encodedstreamtest.cpp +313 -0
  274. data/ext/rj_schema/rapidjson/test/unittest/encodingstest.cpp +451 -0
  275. data/ext/rj_schema/rapidjson/test/unittest/filestreamtest.cpp +112 -0
  276. data/ext/rj_schema/rapidjson/test/unittest/fwdtest.cpp +230 -0
  277. data/ext/rj_schema/rapidjson/test/unittest/istreamwrappertest.cpp +181 -0
  278. data/ext/rj_schema/rapidjson/test/unittest/itoatest.cpp +160 -0
  279. data/ext/rj_schema/rapidjson/test/unittest/jsoncheckertest.cpp +143 -0
  280. data/ext/rj_schema/rapidjson/test/unittest/namespacetest.cpp +70 -0
  281. data/ext/rj_schema/rapidjson/test/unittest/ostreamwrappertest.cpp +92 -0
  282. data/ext/rj_schema/rapidjson/test/unittest/pointertest.cpp +1529 -0
  283. data/ext/rj_schema/rapidjson/test/unittest/prettywritertest.cpp +344 -0
  284. data/ext/rj_schema/rapidjson/test/unittest/readertest.cpp +1895 -0
  285. data/ext/rj_schema/rapidjson/test/unittest/regextest.cpp +638 -0
  286. data/ext/rj_schema/rapidjson/test/unittest/schematest.cpp +2009 -0
  287. data/ext/rj_schema/rapidjson/test/unittest/simdtest.cpp +219 -0
  288. data/ext/rj_schema/rapidjson/test/unittest/strfunctest.cpp +30 -0
  289. data/ext/rj_schema/rapidjson/test/unittest/stringbuffertest.cpp +192 -0
  290. data/ext/rj_schema/rapidjson/test/unittest/strtodtest.cpp +132 -0
  291. data/ext/rj_schema/rapidjson/test/unittest/unittest.cpp +51 -0
  292. data/ext/rj_schema/rapidjson/test/unittest/unittest.h +140 -0
  293. data/ext/rj_schema/rapidjson/test/unittest/valuetest.cpp +1829 -0
  294. data/ext/rj_schema/rapidjson/test/unittest/writertest.cpp +598 -0
  295. data/ext/rj_schema/rapidjson/test/valgrind.supp +17 -0
  296. data/ext/rj_schema/rapidjson/travis-doxygen.sh +121 -0
  297. data/ext/rj_schema/rj_schema.cpp +136 -0
  298. data/lib/rj_schema.rb +7 -0
  299. metadata +371 -0
@@ -0,0 +1,368 @@
1
+ # Internals
2
+
3
+ This section records some design and implementation details.
4
+
5
+ [TOC]
6
+
7
+ # Architecture {#Architecture}
8
+
9
+ ## SAX and DOM
10
+
11
+ The basic relationships of SAX and DOM is shown in the following UML diagram.
12
+
13
+ ![Architecture UML class diagram](diagram/architecture.png)
14
+
15
+ The core of the relationship is the `Handler` concept. From the SAX side, `Reader` parses a JSON from a stream and publish events to a `Handler`. `Writer` implements the `Handler` concept to handle the same set of events. From the DOM side, `Document` implements the `Handler` concept to build a DOM according to the events. `Value` supports a `Value::Accept(Handler&)` function, which traverses the DOM to publish events.
16
+
17
+ With this design, SAX is not dependent on DOM. Even `Reader` and `Writer` have no dependencies between them. This provides flexibility to chain event publisher and handlers. Besides, `Value` does not depends on SAX as well. So, in addition to stringify a DOM to JSON, user may also stringify it to a XML writer, or do anything else.
18
+
19
+ ## Utility Classes
20
+
21
+ Both SAX and DOM APIs depends on 3 additional concepts: `Allocator`, `Encoding` and `Stream`. Their inheritance hierarchy is shown as below.
22
+
23
+ ![Utility classes UML class diagram](diagram/utilityclass.png)
24
+
25
+ # Value {#Value}
26
+
27
+ `Value` (actually a typedef of `GenericValue<UTF8<>>`) is the core of DOM API. This section describes the design of it.
28
+
29
+ ## Data Layout {#DataLayout}
30
+
31
+ `Value` is a [variant type](http://en.wikipedia.org/wiki/Variant_type). In RapidJSON's context, an instance of `Value` can contain 1 of 6 JSON value types. This is possible by using `union`. Each `Value` contains two members: `union Data data_` and a`unsigned flags_`. The `flags_` indiciates the JSON type, and also additional information.
32
+
33
+ The following tables show the data layout of each type. The 32-bit/64-bit columns indicates the size of the field in bytes.
34
+
35
+ | Null | |32-bit|64-bit|
36
+ |-------------------|----------------------------------|:----:|:----:|
37
+ | (unused) | |4 |8 |
38
+ | (unused) | |4 |4 |
39
+ | (unused) | |4 |4 |
40
+ | `unsigned flags_` | `kNullType kNullFlag` |4 |4 |
41
+
42
+ | Bool | |32-bit|64-bit|
43
+ |-------------------|----------------------------------------------------|:----:|:----:|
44
+ | (unused) | |4 |8 |
45
+ | (unused) | |4 |4 |
46
+ | (unused) | |4 |4 |
47
+ | `unsigned flags_` | `kBoolType` (either `kTrueFlag` or `kFalseFlag`) |4 |4 |
48
+
49
+ | String | |32-bit|64-bit|
50
+ |---------------------|-------------------------------------|:----:|:----:|
51
+ | `Ch* str` | Pointer to the string (may own) |4 |8 |
52
+ | `SizeType length` | Length of string |4 |4 |
53
+ | (unused) | |4 |4 |
54
+ | `unsigned flags_` | `kStringType kStringFlag ...` |4 |4 |
55
+
56
+ | Object | |32-bit|64-bit|
57
+ |---------------------|-------------------------------------|:----:|:----:|
58
+ | `Member* members` | Pointer to array of members (owned) |4 |8 |
59
+ | `SizeType size` | Number of members |4 |4 |
60
+ | `SizeType capacity` | Capacity of members |4 |4 |
61
+ | `unsigned flags_` | `kObjectType kObjectFlag` |4 |4 |
62
+
63
+ | Array | |32-bit|64-bit|
64
+ |---------------------|-------------------------------------|:----:|:----:|
65
+ | `Value* values` | Pointer to array of values (owned) |4 |8 |
66
+ | `SizeType size` | Number of values |4 |4 |
67
+ | `SizeType capacity` | Capacity of values |4 |4 |
68
+ | `unsigned flags_` | `kArrayType kArrayFlag` |4 |4 |
69
+
70
+ | Number (Int) | |32-bit|64-bit|
71
+ |---------------------|-------------------------------------|:----:|:----:|
72
+ | `int i` | 32-bit signed integer |4 |4 |
73
+ | (zero padding) | 0 |4 |4 |
74
+ | (unused) | |4 |8 |
75
+ | `unsigned flags_` | `kNumberType kNumberFlag kIntFlag kInt64Flag ...` |4 |4 |
76
+
77
+ | Number (UInt) | |32-bit|64-bit|
78
+ |---------------------|-------------------------------------|:----:|:----:|
79
+ | `unsigned u` | 32-bit unsigned integer |4 |4 |
80
+ | (zero padding) | 0 |4 |4 |
81
+ | (unused) | |4 |8 |
82
+ | `unsigned flags_` | `kNumberType kNumberFlag kUIntFlag kUInt64Flag ...` |4 |4 |
83
+
84
+ | Number (Int64) | |32-bit|64-bit|
85
+ |---------------------|-------------------------------------|:----:|:----:|
86
+ | `int64_t i64` | 64-bit signed integer |8 |8 |
87
+ | (unused) | |4 |8 |
88
+ | `unsigned flags_` | `kNumberType kNumberFlag kInt64Flag ...` |4 |4 |
89
+
90
+ | Number (Uint64) | |32-bit|64-bit|
91
+ |---------------------|-------------------------------------|:----:|:----:|
92
+ | `uint64_t i64` | 64-bit unsigned integer |8 |8 |
93
+ | (unused) | |4 |8 |
94
+ | `unsigned flags_` | `kNumberType kNumberFlag kInt64Flag ...` |4 |4 |
95
+
96
+ | Number (Double) | |32-bit|64-bit|
97
+ |---------------------|-------------------------------------|:----:|:----:|
98
+ | `uint64_t i64` | Double precision floating-point |8 |8 |
99
+ | (unused) | |4 |8 |
100
+ | `unsigned flags_` | `kNumberType kNumberFlag kDoubleFlag` |4 |4 |
101
+
102
+ Here are some notes:
103
+ * To reduce memory consumption for 64-bit architecture, `SizeType` is typedef as `unsigned` instead of `size_t`.
104
+ * Zero padding for 32-bit number may be placed after or before the actual type, according to the endianess. This makes possible for interpreting a 32-bit integer as a 64-bit integer, without any conversion.
105
+ * An `Int` is always an `Int64`, but the converse is not always true.
106
+
107
+ ## Flags {#Flags}
108
+
109
+ The 32-bit `flags_` contains both JSON type and other additional information. As shown in the above tables, each JSON type contains redundant `kXXXType` and `kXXXFlag`. This design is for optimizing the operation of testing bit-flags (`IsNumber()`) and obtaining a sequential number for each type (`GetType()`).
110
+
111
+ String has two optional flags. `kCopyFlag` means that the string owns a copy of the string. `kInlineStrFlag` means using [Short-String Optimization](#ShortString).
112
+
113
+ Number is a bit more complicated. For normal integer values, it can contains `kIntFlag`, `kUintFlag`, `kInt64Flag` and/or `kUint64Flag`, according to the range of the integer. For numbers with fraction, and integers larger than 64-bit range, they will be stored as `double` with `kDoubleFlag`.
114
+
115
+ ## Short-String Optimization {#ShortString}
116
+
117
+ [Kosta](https://github.com/Kosta-Github) provided a very neat short-string optimization. The optimization idea is given as follow. Excluding the `flags_`, a `Value` has 12 or 16 bytes (32-bit or 64-bit) for storing actual data. Instead of storing a pointer to a string, it is possible to store short strings in these space internally. For encoding with 1-byte character type (e.g. `char`), it can store maximum 11 or 15 characters string inside the `Value` type.
118
+
119
+ | ShortString (Ch=char) | |32-bit|64-bit|
120
+ |---------------------|-------------------------------------|:----:|:----:|
121
+ | `Ch str[MaxChars]` | String buffer |11 |15 |
122
+ | `Ch invLength` | MaxChars - Length |1 |1 |
123
+ | `unsigned flags_` | `kStringType kStringFlag ...` |4 |4 |
124
+
125
+ A special technique is applied. Instead of storing the length of string directly, it stores (MaxChars - length). This make it possible to store 11 characters with trailing `\0`.
126
+
127
+ This optimization can reduce memory usage for copy-string. It can also improve cache-coherence thus improve runtime performance.
128
+
129
+ # Allocator {#InternalAllocator}
130
+
131
+ `Allocator` is a concept in RapidJSON:
132
+ ~~~cpp
133
+ concept Allocator {
134
+ static const bool kNeedFree; //!< Whether this allocator needs to call Free().
135
+
136
+ // Allocate a memory block.
137
+ // \param size of the memory block in bytes.
138
+ // \returns pointer to the memory block.
139
+ void* Malloc(size_t size);
140
+
141
+ // Resize a memory block.
142
+ // \param originalPtr The pointer to current memory block. Null pointer is permitted.
143
+ // \param originalSize The current size in bytes. (Design issue: since some allocator may not book-keep this, explicitly pass to it can save memory.)
144
+ // \param newSize the new size in bytes.
145
+ void* Realloc(void* originalPtr, size_t originalSize, size_t newSize);
146
+
147
+ // Free a memory block.
148
+ // \param pointer to the memory block. Null pointer is permitted.
149
+ static void Free(void *ptr);
150
+ };
151
+ ~~~
152
+
153
+ Note that `Malloc()` and `Realloc()` are member functions but `Free()` is static member function.
154
+
155
+ ## MemoryPoolAllocator {#MemoryPoolAllocator}
156
+
157
+ `MemoryPoolAllocator` is the default allocator for DOM. It allocate but do not free memory. This is suitable for building a DOM tree.
158
+
159
+ Internally, it allocates chunks of memory from the base allocator (by default `CrtAllocator`) and stores the chunks as a singly linked list. When user requests an allocation, it allocates memory from the following order:
160
+
161
+ 1. User supplied buffer if it is available. (See [User Buffer section in DOM](doc/dom.md))
162
+ 2. If user supplied buffer is full, use the current memory chunk.
163
+ 3. If the current block is full, allocate a new block of memory.
164
+
165
+ # Parsing Optimization {#ParsingOptimization}
166
+
167
+ ## Skip Whitespaces with SIMD {#SkipwhitespaceWithSIMD}
168
+
169
+ When parsing JSON from a stream, the parser need to skip 4 whitespace characters:
170
+
171
+ 1. Space (`U+0020`)
172
+ 2. Character Tabulation (`U+000B`)
173
+ 3. Line Feed (`U+000A`)
174
+ 4. Carriage Return (`U+000D`)
175
+
176
+ A simple implementation will be simply:
177
+ ~~~cpp
178
+ void SkipWhitespace(InputStream& s) {
179
+ while (s.Peek() == ' ' || s.Peek() == '\n' || s.Peek() == '\r' || s.Peek() == '\t')
180
+ s.Take();
181
+ }
182
+ ~~~
183
+
184
+ However, this requires 4 comparisons and a few branching for each character. This was found to be a hot spot.
185
+
186
+ To accelerate this process, SIMD was applied to compare 16 characters with 4 white spaces for each iteration. Currently RapidJSON supports SSE2, SSE4.2 and ARM Neon instructions for this. And it is only activated for UTF-8 memory streams, including string stream or *in situ* parsing.
187
+
188
+ To enable this optimization, need to define `RAPIDJSON_SSE2`, `RAPIDJSON_SSE42` or `RAPIDJSON_NEON` before including `rapidjson.h`. Some compilers can detect the setting, as in `perftest.h`:
189
+
190
+ ~~~cpp
191
+ // __SSE2__ and __SSE4_2__ are recognized by gcc, clang, and the Intel compiler.
192
+ // We use -march=native with gmake to enable -msse2 and -msse4.2, if supported.
193
+ // Likewise, __ARM_NEON is used to detect Neon.
194
+ #if defined(__SSE4_2__)
195
+ # define RAPIDJSON_SSE42
196
+ #elif defined(__SSE2__)
197
+ # define RAPIDJSON_SSE2
198
+ #elif defined(__ARM_NEON)
199
+ # define RAPIDJSON_NEON
200
+ #endif
201
+ ~~~
202
+
203
+ Note that, these are compile-time settings. Running the executable on a machine without such instruction set support will make it crash.
204
+
205
+ ### Page boundary issue
206
+
207
+ In an early version of RapidJSON, [an issue](https://code.google.com/archive/p/rapidjson/issues/104) reported that the `SkipWhitespace_SIMD()` causes crash very rarely (around 1 in 500,000). After investigation, it is suspected that `_mm_loadu_si128()` accessed bytes after `'\0'`, and across a protected page boundary.
208
+
209
+ In [Intel® 64 and IA-32 Architectures Optimization Reference Manual
210
+ ](http://www.intel.com/content/www/us/en/architecture-and-technology/64-ia-32-architectures-optimization-manual.html), section 10.2.1:
211
+
212
+ > To support algorithms requiring unaligned 128-bit SIMD memory accesses, memory buffer allocation by a caller function should consider adding some pad space so that a callee function can safely use the address pointer safely with unaligned 128-bit SIMD memory operations.
213
+ > The minimal padding size should be the width of the SIMD register that might be used in conjunction with unaligned SIMD memory access.
214
+
215
+ This is not feasible as RapidJSON should not enforce such requirement.
216
+
217
+ To fix this issue, currently the routine process bytes up to the next aligned address. After tha, use aligned read to perform SIMD processing. Also see [#85](https://github.com/Tencent/rapidjson/issues/85).
218
+
219
+ ## Local Stream Copy {#LocalStreamCopy}
220
+
221
+ During optimization, it is found that some compilers cannot localize some member data access of streams into local variables or registers. Experimental results show that for some stream types, making a copy of the stream and used it in inner-loop can improve performance. For example, the actual (non-SIMD) implementation of `SkipWhitespace()` is implemented as:
222
+
223
+ ~~~cpp
224
+ template<typename InputStream>
225
+ void SkipWhitespace(InputStream& is) {
226
+ internal::StreamLocalCopy<InputStream> copy(is);
227
+ InputStream& s(copy.s);
228
+
229
+ while (s.Peek() == ' ' || s.Peek() == '\n' || s.Peek() == '\r' || s.Peek() == '\t')
230
+ s.Take();
231
+ }
232
+ ~~~
233
+
234
+ Depending on the traits of stream, `StreamLocalCopy` will make (or not make) a copy of the stream object, use it locally and copy the states of stream back to the original stream.
235
+
236
+ ## Parsing to Double {#ParsingDouble}
237
+
238
+ Parsing string into `double` is difficult. The standard library function `strtod()` can do the job but it is slow. By default, the parsers use normal precision setting. This has has maximum 3 [ULP](http://en.wikipedia.org/wiki/Unit_in_the_last_place) error and implemented in `internal::StrtodNormalPrecision()`.
239
+
240
+ When using `kParseFullPrecisionFlag`, the parsers calls `internal::StrtodFullPrecision()` instead, and this function actually implemented 3 versions of conversion methods.
241
+ 1. [Fast-Path](http://www.exploringbinary.com/fast-path-decimal-to-floating-point-conversion/).
242
+ 2. Custom DIY-FP implementation as in [double-conversion](https://github.com/floitsch/double-conversion).
243
+ 3. Big Integer Method as in (Clinger, William D. How to read floating point numbers accurately. Vol. 25. No. 6. ACM, 1990).
244
+
245
+ If the first conversion methods fail, it will try the second, and so on.
246
+
247
+ # Generation Optimization {#GenerationOptimization}
248
+
249
+ ## Integer-to-String conversion {#itoa}
250
+
251
+ The naive algorithm for integer-to-string conversion involves division per each decimal digit. We have implemented various implementations and evaluated them in [itoa-benchmark](https://github.com/miloyip/itoa-benchmark).
252
+
253
+ Although SSE2 version is the fastest but the difference is minor by comparing to the first running-up `branchlut`. And `branchlut` is pure C++ implementation so we adopt `branchlut` in RapidJSON.
254
+
255
+ ## Double-to-String conversion {#dtoa}
256
+
257
+ Originally RapidJSON uses `snprintf(..., ..., "%g")` to achieve double-to-string conversion. This is not accurate as the default precision is 6. Later we also find that this is slow and there is an alternative.
258
+
259
+ Google's V8 [double-conversion](https://github.com/floitsch/double-conversion
260
+ ) implemented a newer, fast algorithm called Grisu3 (Loitsch, Florian. "Printing floating-point numbers quickly and accurately with integers." ACM Sigplan Notices 45.6 (2010): 233-243.).
261
+
262
+ However, since it is not header-only so that we implemented a header-only version of Grisu2. This algorithm guarantees that the result is always accurate. And in most of cases it produces the shortest (optimal) string representation.
263
+
264
+ The header-only conversion function has been evaluated in [dtoa-benchmark](https://github.com/miloyip/dtoa-benchmark).
265
+
266
+ # Parser {#Parser}
267
+
268
+ ## Iterative Parser {#IterativeParser}
269
+
270
+ The iterative parser is a recursive descent LL(1) parser
271
+ implemented in a non-recursive manner.
272
+
273
+ ### Grammar {#IterativeParserGrammar}
274
+
275
+ The grammar used for this parser is based on strict JSON syntax:
276
+ ~~~~~~~~~~
277
+ S -> array | object
278
+ array -> [ values ]
279
+ object -> { members }
280
+ values -> non-empty-values | ε
281
+ non-empty-values -> value addition-values
282
+ addition-values -> ε | , non-empty-values
283
+ members -> non-empty-members | ε
284
+ non-empty-members -> member addition-members
285
+ addition-members -> ε | , non-empty-members
286
+ member -> STRING : value
287
+ value -> STRING | NUMBER | NULL | BOOLEAN | object | array
288
+ ~~~~~~~~~~
289
+
290
+ Note that left factoring is applied to non-terminals `values` and `members`
291
+ to make the grammar be LL(1).
292
+
293
+ ### Parsing Table {#IterativeParserParsingTable}
294
+
295
+ Based on the grammar, we can construct the FIRST and FOLLOW set.
296
+
297
+ The FIRST set of non-terminals is listed below:
298
+
299
+ | NON-TERMINAL | FIRST |
300
+ |:-----------------:|:--------------------------------:|
301
+ | array | [ |
302
+ | object | { |
303
+ | values | ε STRING NUMBER NULL BOOLEAN { [ |
304
+ | addition-values | ε COMMA |
305
+ | members | ε STRING |
306
+ | addition-members | ε COMMA |
307
+ | member | STRING |
308
+ | value | STRING NUMBER NULL BOOLEAN { [ |
309
+ | S | [ { |
310
+ | non-empty-members | STRING |
311
+ | non-empty-values | STRING NUMBER NULL BOOLEAN { [ |
312
+
313
+ The FOLLOW set is listed below:
314
+
315
+ | NON-TERMINAL | FOLLOW |
316
+ |:-----------------:|:-------:|
317
+ | S | $ |
318
+ | array | , $ } ] |
319
+ | object | , $ } ] |
320
+ | values | ] |
321
+ | non-empty-values | ] |
322
+ | addition-values | ] |
323
+ | members | } |
324
+ | non-empty-members | } |
325
+ | addition-members | } |
326
+ | member | , } |
327
+ | value | , } ] |
328
+
329
+ Finally the parsing table can be constructed from FIRST and FOLLOW set:
330
+
331
+ | NON-TERMINAL | [ | { | , | : | ] | } | STRING | NUMBER | NULL | BOOLEAN |
332
+ |:-----------------:|:---------------------:|:---------------------:|:-------------------:|:-:|:-:|:-:|:-----------------------:|:---------------------:|:---------------------:|:---------------------:|
333
+ | S | array | object | | | | | | | | |
334
+ | array | [ values ] | | | | | | | | | |
335
+ | object | | { members } | | | | | | | | |
336
+ | values | non-empty-values | non-empty-values | | | ε | | non-empty-values | non-empty-values | non-empty-values | non-empty-values |
337
+ | non-empty-values | value addition-values | value addition-values | | | | | value addition-values | value addition-values | value addition-values | value addition-values |
338
+ | addition-values | | | , non-empty-values | | ε | | | | | |
339
+ | members | | | | | | ε | non-empty-members | | | |
340
+ | non-empty-members | | | | | | | member addition-members | | | |
341
+ | addition-members | | | , non-empty-members | | | ε | | | | |
342
+ | member | | | | | | | STRING : value | | | |
343
+ | value | array | object | | | | | STRING | NUMBER | NULL | BOOLEAN |
344
+
345
+ There is a great [tool](http://hackingoff.com/compilers/predict-first-follow-set) for above grammar analysis.
346
+
347
+ ### Implementation {#IterativeParserImplementation}
348
+
349
+ Based on the parsing table, a direct(or conventional) implementation
350
+ that pushes the production body in reverse order
351
+ while generating a production could work.
352
+
353
+ In RapidJSON, several modifications(or adaptations to current design) are made to a direct implementation.
354
+
355
+ First, the parsing table is encoded in a state machine in RapidJSON.
356
+ States are constructed by the head and body of production.
357
+ State transitions are constructed by production rules.
358
+ Besides, extra states are added for productions involved with `array` and `object`.
359
+ In this way the generation of array values or object members would be a single state transition,
360
+ rather than several pop/push operations in the direct implementation.
361
+ This also makes the estimation of stack size more easier.
362
+
363
+ The state diagram is shown as follows:
364
+
365
+ ![State Diagram](diagram/iterative-parser-states-diagram.png)
366
+
367
+ Second, the iterative parser also keeps track of array's value count and object's member count
368
+ in its internal stack, which may be different from a conventional implementation.
@@ -0,0 +1,363 @@
1
+ # 内部架构
2
+
3
+ 本部分记录了一些设计和实现细节。
4
+
5
+ [TOC]
6
+
7
+ # 架构 {#Architecture}
8
+
9
+ ## SAX 和 DOM
10
+
11
+ 下面的 UML 图显示了 SAX 和 DOM 的基本关系。
12
+
13
+ ![架构 UML 类图](diagram/architecture.png)
14
+
15
+ 关系的核心是 `Handler` 概念。在 SAX 一边,`Reader` 从流解析 JSON 并将事件发送到 `Handler`。`Writer` 实现了 `Handler` 概念,用于处理相同的事件。在 DOM 一边,`Document` 实现了 `Handler` 概念,用于通过这些时间来构建 DOM。`Value` 支持了 `Value::Accept(Handler&)` 函数,它可以将 DOM 转换为事件进行发送。
16
+
17
+ 在这个设计,SAX 是不依赖于 DOM 的。甚至 `Reader` 和 `Writer` 之间也没有依赖。这提供了连接事件发送器和处理器的灵活性。除此之外,`Value` 也是不依赖于 SAX 的。所以,除了将 DOM 序列化为 JSON 之外,用户也可以将其序列化为 XML,或者做任何其他事情。
18
+
19
+ ## 工具类
20
+
21
+ SAX 和 DOM API 都依赖于3个额外的概念:`Allocator`、`Encoding` 和 `Stream`。它们的继承层次结构如下图所示。
22
+
23
+ ![工具类 UML 类图](diagram/utilityclass.png)
24
+
25
+ # 值(Value) {#Value}
26
+
27
+ `Value` (实际上被定义为 `GenericValue<UTF8<>>`)是 DOM API 的核心。本部分描述了它的设计。
28
+
29
+ ## 数据布局 {#DataLayout}
30
+
31
+ `Value` 是[可变类型](http://en.wikipedia.org/wiki/Variant_type)。在 RapidJSON 的上下文中,一个 `Value` 的实例可以包含6种 JSON 数据类型之一。通过使用 `union` ,这是可能实现的。每一个 `Value` 包含两个成员:`union Data data_` 和 `unsigned flags_`。`flags_` 表明了 JSON 类型,以及附加的信息。
32
+
33
+ 下表显示了所有类型的数据布局。32位/64位列表明了字段所占用的字节数。
34
+
35
+ | Null | | 32位 | 64位 |
36
+ |-------------------|----------------------------------|:----:|:----:|
37
+ | (未使用) | |4 |8 |
38
+ | (未使用) | |4 |4 |
39
+ | (未使用) | |4 |4 |
40
+ | `unsigned flags_` | `kNullType kNullFlag` |4 |4 |
41
+
42
+ | Bool | | 32位 | 64位 |
43
+ |-------------------|----------------------------------------------------|:----:|:----:|
44
+ | (未使用) | |4 |8 |
45
+ | (未使用) | |4 |4 |
46
+ | (未使用) | |4 |4 |
47
+ | `unsigned flags_` | `kBoolType` (either `kTrueFlag` or `kFalseFlag`) |4 |4 |
48
+
49
+ | String | | 32位 | 64位 |
50
+ |---------------------|-------------------------------------|:----:|:----:|
51
+ | `Ch* str` | 指向字符串的指针(可能拥有所有权) |4 |8 |
52
+ | `SizeType length` | 字符串长度 |4 |4 |
53
+ | (未使用) | |4 |4 |
54
+ | `unsigned flags_` | `kStringType kStringFlag ...` |4 |4 |
55
+
56
+ | Object | | 32位 | 64位 |
57
+ |---------------------|-------------------------------------|:----:|:----:|
58
+ | `Member* members` | 指向成员数组的指针(拥有所有权) |4 |8 |
59
+ | `SizeType size` | 成员数量 |4 |4 |
60
+ | `SizeType capacity` | 成员容量 |4 |4 |
61
+ | `unsigned flags_` | `kObjectType kObjectFlag` |4 |4 |
62
+
63
+ | Array | | 32位 | 64位 |
64
+ |---------------------|-------------------------------------|:----:|:----:|
65
+ | `Value* values` | 指向值数组的指针(拥有所有权) |4 |8 |
66
+ | `SizeType size` | 值数量 |4 |4 |
67
+ | `SizeType capacity` | 值容量 |4 |4 |
68
+ | `unsigned flags_` | `kArrayType kArrayFlag` |4 |4 |
69
+
70
+ | Number (Int) | | 32位 | 64位 |
71
+ |---------------------|-------------------------------------|:----:|:----:|
72
+ | `int i` | 32位有符号整数 |4 |4 |
73
+ | (零填充) | 0 |4 |4 |
74
+ | (未使用) | |4 |8 |
75
+ | `unsigned flags_` | `kNumberType kNumberFlag kIntFlag kInt64Flag ...` |4 |4 |
76
+
77
+ | Number (UInt) | | 32位 | 64位 |
78
+ |---------------------|-------------------------------------|:----:|:----:|
79
+ | `unsigned u` | 32位无符号整数 |4 |4 |
80
+ | (零填充) | 0 |4 |4 |
81
+ | (未使用) | |4 |8 |
82
+ | `unsigned flags_` | `kNumberType kNumberFlag kUIntFlag kUInt64Flag ...` |4 |4 |
83
+
84
+ | Number (Int64) | | 32位 | 64位 |
85
+ |---------------------|-------------------------------------|:----:|:----:|
86
+ | `int64_t i64` | 64位有符号整数 |8 |8 |
87
+ | (未使用) | |4 |8 |
88
+ | `unsigned flags_` | `kNumberType kNumberFlag kInt64Flag ...` |4 |4 |
89
+
90
+ | Number (Uint64) | | 32位 | 64位 |
91
+ |---------------------|-------------------------------------|:----:|:----:|
92
+ | `uint64_t i64` | 64位无符号整数 |8 |8 |
93
+ | (未使用) | |4 |8 |
94
+ | `unsigned flags_` | `kNumberType kNumberFlag kInt64Flag ...` |4 |4 |
95
+
96
+ | Number (Double) | | 32位 | 64位 |
97
+ |---------------------|-------------------------------------|:----:|:----:|
98
+ | `uint64_t i64` | 双精度浮点数 |8 |8 |
99
+ | (未使用) | |4 |8 |
100
+ | `unsigned flags_` |`kNumberType kNumberFlag kDoubleFlag`|4 |4 |
101
+
102
+ 这里有一些需要注意的地方:
103
+ * 为了减少在64位架构上的内存消耗,`SizeType` 被定义为 `unsigned` 而不是 `size_t`。
104
+ * 32位整数的零填充可能被放在实际类型的前面或后面,这依赖于字节序。这使得它可以将32位整数不经过任何转换就可以解释为64位整数。
105
+ * `Int` 永远是 `Int64`,反之不然。
106
+
107
+ ## 标志 {#Flags}
108
+
109
+ 32位的 `flags_` 包含了 JSON 类型和其他信息。如前文中的表所述,每一种 JSON 类型包含了冗余的 `kXXXType` 和 `kXXXFlag`。这个设计是为了优化测试位标志(`IsNumber()`)和获取每一种类型的序列号(`GetType()`)。
110
+
111
+ 字符串有两个可选的标志。`kCopyFlag` 表明这个字符串拥有字符串拷贝的所有权。而 `kInlineStrFlag` 意味着使用了[短字符串优化](#ShortString)。
112
+
113
+ 数字更加复杂一些。对于普通的整数值,它可以包含 `kIntFlag`、`kUintFlag`、 `kInt64Flag` 和/或 `kUint64Flag`,这由整数的范围决定。带有小数或者超过64位所能表达的范围的整数的数字会被存储为带有 `kDoubleFlag` 的 `double`。
114
+
115
+ ## 短字符串优化 {#ShortString}
116
+
117
+ [Kosta](https://github.com/Kosta-Github) 提供了很棒的短字符串优化。这个优化的xxx如下所述。除去 `flags_` ,`Value` 有12或16字节(对于32位或64位)来存储实际的数据。这为在其内部直接存储短字符串而不是存储字符串的指针创造了可能。对于1字节的字符类型(例如 `char`),它可以在 `Value` 类型内部存储至多11或15个字符的字符串。
118
+
119
+ |ShortString (Ch=char)| | 32位 | 64位 |
120
+ |---------------------|-------------------------------------|:----:|:----:|
121
+ | `Ch str[MaxChars]` | 字符串缓冲区 |11 |15 |
122
+ | `Ch invLength` | MaxChars - Length |1 |1 |
123
+ | `unsigned flags_` | `kStringType kStringFlag ...` |4 |4 |
124
+
125
+ 这里使用了一项特殊的技术。它存储了 (MaxChars - length) 而不直接存储字符串的长度。这使得存储11个字符并且带有后缀 `\0` 成为可能。
126
+
127
+ 这个优化可以减少字符串拷贝内存占用。它也改善了缓存一致性,并进一步提高了运行时性能。
128
+
129
+ # 分配器(Allocator) {#InternalAllocator}
130
+
131
+ `Allocator` 是 RapidJSON 中的概念:
132
+ ~~~cpp
133
+ concept Allocator {
134
+ static const bool kNeedFree; //!< 表明这个分配器是否需要调用 Free()。
135
+
136
+ // 申请内存块。
137
+ // \param size 内存块的大小,以字节记。
138
+ // \returns 指向内存块的指针。
139
+ void* Malloc(size_t size);
140
+
141
+ // 调整内存块的大小。
142
+ // \param originalPtr 当前内存块的指针。空指针是被允许的。
143
+ // \param originalSize 当前大小,以字节记。(设计问题:因为有些分配器可能不会记录它,显示的传递它可以节约内存。)
144
+ // \param newSize 新大小,以字节记。
145
+ void* Realloc(void* originalPtr, size_t originalSize, size_t newSize);
146
+
147
+ // 释放内存块。
148
+ // \param ptr 指向内存块的指针。空指针是被允许的。
149
+ static void Free(void *ptr);
150
+ };
151
+ ~~~
152
+
153
+ 需要注意的是 `Malloc()` 和 `Realloc()` 是成员函数而 `Free()` 是静态成员函数。
154
+
155
+ ## MemoryPoolAllocator {#MemoryPoolAllocator}
156
+
157
+ `MemoryPoolAllocator` 是 DOM 的默认内存分配器。它只申请内存而不释放内存。这对于构建 DOM 树非常合适。
158
+
159
+ 在它的内部,它从基础的内存分配器申请内存块(默认为 `CrtAllocator`)并将这些内存块存储为单向链表。当用户请求申请内存,它会遵循下列步骤来申请内存:
160
+
161
+ 1. 如果可用,使用用户提供的缓冲区。(见 [User Buffer section in DOM](doc/dom.md))
162
+ 2. 如果用户提供的缓冲区已满,使用当前内存块。
163
+ 3. 如果当前内存块已满,申请新的内存块。
164
+
165
+ # 解析优化 {#ParsingOptimization}
166
+
167
+ ## 使用 SIMD 跳过空格 {#SkipwhitespaceWithSIMD}
168
+
169
+ 当从流中解析 JSON 时,解析器需要跳过4种空格字符:
170
+
171
+ 1. 空格 (`U+0020`)
172
+ 2. 制表符 (`U+000B`)
173
+ 3. 换行 (`U+000A`)
174
+ 4. 回车 (`U+000D`)
175
+
176
+ 这是一份简单的实现:
177
+ ~~~cpp
178
+ void SkipWhitespace(InputStream& s) {
179
+ while (s.Peek() == ' ' || s.Peek() == '\n' || s.Peek() == '\r' || s.Peek() == '\t')
180
+ s.Take();
181
+ }
182
+ ~~~
183
+
184
+ 但是,这需要对每个字符进行4次比较以及一些分支。这被发现是一个热点。
185
+
186
+ 为了加速这一处理,RapidJSON 使用 SIMD 来在一次迭代中比较16个字符和4个空格。目前 RapidJSON 支持 SSE2 , SSE4.2 和 ARM Neon 指令。同时它也只会对 UTF-8 内存流启用,包括字符串流或 *原位* 解析。
187
+
188
+ 你可以通过在包含 `rapidjson.h` 之前定义 `RAPIDJSON_SSE2` , `RAPIDJSON_SSE42` 或 `RAPIDJSON_NEON` 来启用这个优化。一些编译器可以检测这个设置,如 `perftest.h`:
189
+
190
+ ~~~cpp
191
+ // __SSE2__ 和 __SSE4_2__ 可被 gcc、clang 和 Intel 编译器识别:
192
+ // 如果支持的话,我们在 gmake 中使用了 -march=native 来启用 -msse2 和 -msse4.2
193
+ // 同样的, __ARM_NEON 被用于识别Neon
194
+ #if defined(__SSE4_2__)
195
+ # define RAPIDJSON_SSE42
196
+ #elif defined(__SSE2__)
197
+ # define RAPIDJSON_SSE2
198
+ #elif defined(__ARM_NEON)
199
+ # define RAPIDJSON_NEON
200
+ #endif
201
+ ~~~
202
+
203
+ 需要注意的是,这是编译期的设置。在不支持这些指令的机器上运行可执行文件会使它崩溃。
204
+
205
+ ### 页面对齐问题
206
+
207
+ 在 RapidJSON 的早期版本中,被报告了[一个问题](https://code.google.com/archive/p/rapidjson/issues/104):`SkipWhitespace_SIMD()` 会罕见地导致崩溃(约五十万分之一的几率)。在调查之后,怀疑是 `_mm_loadu_si128()` 访问了 `'\0'` 之后的内存,并越过被保护的页面边界。
208
+
209
+ 在 [Intel® 64 and IA-32 Architectures Optimization Reference Manual
210
+ ](http://www.intel.com/content/www/us/en/architecture-and-technology/64-ia-32-architectures-optimization-manual.html) 中,章节 10.2.1:
211
+
212
+ > 为了支持需要费对齐的128位 SIMD 内存访问的算法,调用者的内存缓冲区申请应当考虑添加一些填充空间,这样被调用的函数可以安全地将地址指针用于未对齐的128位 SIMD 内存操作。
213
+ > 在结合非对齐的 SIMD 内存操作中,最小的对齐大小应该等于 SIMD 寄存器的大小。
214
+
215
+ 对于 RapidJSON 来说,这显然是不可行的,因为 RapidJSON 不应当强迫用户进行内存对齐。
216
+
217
+ 为了修复这个问题,当前的代码会先按字节处理直到下一个对齐的地址。在这之后,使用对齐读取来进行 SIMD 处理。见 [#85](https://github.com/Tencent/rapidjson/issues/85)。
218
+
219
+ ## 局部流拷贝 {#LocalStreamCopy}
220
+
221
+ 在优化的过程中,我们发现一些编译器不能将访问流的一些成员数据放入局部变量或者寄存器中。测试结果显示,对于一些流类型,创建流的拷贝并将其用于内层循环中可以改善性能。例如,实际(非 SIMD)的 `SkipWhitespace()` 被实现为:
222
+
223
+ ~~~cpp
224
+ template<typename InputStream>
225
+ void SkipWhitespace(InputStream& is) {
226
+ internal::StreamLocalCopy<InputStream> copy(is);
227
+ InputStream& s(copy.s);
228
+
229
+ while (s.Peek() == ' ' || s.Peek() == '\n' || s.Peek() == '\r' || s.Peek() == '\t')
230
+ s.Take();
231
+ }
232
+ ~~~
233
+
234
+ 基于流的特征,`StreamLocalCopy` 会创建(或不创建)流对象的拷贝,在局部使用它并将流的状态拷贝回原来的流。
235
+
236
+ ## 解析为双精度浮点数 {#ParsingDouble}
237
+
238
+ 将字符串解析为 `double` 并不简单。标准库函数 `strtod()` 可以胜任这项工作,但它比较缓慢。默认情况下,解析器使用默认的精度设置。这最多有 3[ULP](http://en.wikipedia.org/wiki/Unit_in_the_last_place) 的误差,并实现在 `internal::StrtodNormalPrecision()` 中。
239
+
240
+ 当使用 `kParseFullPrecisionFlag` 时,编译器会改为调用 `internal::StrtodFullPrecision()` ,这个函数会自动调用三个版本的转换。
241
+ 1. [Fast-Path](http://www.exploringbinary.com/fast-path-decimal-to-floating-point-conversion/)。
242
+ 2. [double-conversion](https://github.com/floitsch/double-conversion) 中的自定义 DIY-FP 实现。
243
+ 3. (Clinger, William D. How to read floating point numbers accurately. Vol. 25. No. 6. ACM, 1990) 中的大整数算法。
244
+
245
+ 如果第一个转换方法失败,则尝试使用第二种方法,以此类推。
246
+
247
+ # 生成优化 {#GenerationOptimization}
248
+
249
+ ## 整数到字符串的转换 {#itoa}
250
+
251
+ 整数到字符串转换的朴素算法需要对每一个十进制位进行一次处罚。我们实现了若干版本并在 [itoa-benchmark](https://github.com/miloyip/itoa-benchmark) 中对它们进行了评估。
252
+
253
+ 虽然 SSE2 版本是最快的,但它和第二快的 `branchlut` 差距不大。而且 `branchlut` 是纯C++实现,所以我们在 RapidJSON 中使用了 `branchlut`。
254
+
255
+ ## 双精度浮点数到字符串的转换 {#dtoa}
256
+
257
+ 原来 RapidJSON 使用 `snprintf(..., ..., "%g")` 来进行双精度浮点数到字符串的转换。这是不准确的,因为默认的精度是6。随后我们发现它很缓慢,而且有其它的替代品。
258
+
259
+ Google 的 V8 [double-conversion](https://github.com/floitsch/double-conversion
260
+ ) 实现了更新的、快速的被称为 Grisu3 的算法(Loitsch, Florian. "Printing floating-point numbers quickly and accurately with integers." ACM Sigplan Notices 45.6 (2010): 233-243.)。
261
+
262
+ 然而,这个实现不是仅头文件的,所以我们实现了一个仅头文件的 Grisu2 版本。这个算法保证了结果永远精确。而且在大多数情况下,它会生成最短的(可选)字符串表示。
263
+
264
+ 这个仅头文件的转换函数在 [dtoa-benchmark](https://github.com/miloyip/dtoa-benchmark) 中进行评估。
265
+
266
+ # 解析器 {#Parser}
267
+
268
+ ## 迭代解析 {#IterativeParser}
269
+
270
+ 迭代解析器是一个以非递归方式实现的递归下降的 LL(1) 解析器。
271
+
272
+ ### 语法 {#IterativeParserGrammar}
273
+
274
+ 解析器使用的语法是基于严格 JSON 语法的:
275
+ ~~~~~~~~~~
276
+ S -> array | object
277
+ array -> [ values ]
278
+ object -> { members }
279
+ values -> non-empty-values | ε
280
+ non-empty-values -> value addition-values
281
+ addition-values -> ε | , non-empty-values
282
+ members -> non-empty-members | ε
283
+ non-empty-members -> member addition-members
284
+ addition-members -> ε | , non-empty-members
285
+ member -> STRING : value
286
+ value -> STRING | NUMBER | NULL | BOOLEAN | object | array
287
+ ~~~~~~~~~~
288
+
289
+ 注意到左因子被加入了非终结符的 `values` 和 `members` 来保证语法是 LL(1) 的。
290
+
291
+ ### 解析表 {#IterativeParserParsingTable}
292
+
293
+ 基于这份语法,我们可以构造 FIRST 和 FOLLOW 集合。
294
+
295
+ 非终结符的 FIRST 集合如下所示:
296
+
297
+ | NON-TERMINAL | FIRST |
298
+ |:-----------------:|:--------------------------------:|
299
+ | array | [ |
300
+ | object | { |
301
+ | values | ε STRING NUMBER NULL BOOLEAN { [ |
302
+ | addition-values | ε COMMA |
303
+ | members | ε STRING |
304
+ | addition-members | ε COMMA |
305
+ | member | STRING |
306
+ | value | STRING NUMBER NULL BOOLEAN { [ |
307
+ | S | [ { |
308
+ | non-empty-members | STRING |
309
+ | non-empty-values | STRING NUMBER NULL BOOLEAN { [ |
310
+
311
+ FOLLOW 集合如下所示:
312
+
313
+ | NON-TERMINAL | FOLLOW |
314
+ |:-----------------:|:-------:|
315
+ | S | $ |
316
+ | array | , $ } ] |
317
+ | object | , $ } ] |
318
+ | values | ] |
319
+ | non-empty-values | ] |
320
+ | addition-values | ] |
321
+ | members | } |
322
+ | non-empty-members | } |
323
+ | addition-members | } |
324
+ | member | , } |
325
+ | value | , } ] |
326
+
327
+ 最终可以从 FIRST 和 FOLLOW 集合生成解析表:
328
+
329
+ | NON-TERMINAL | [ | { | , | : | ] | } | STRING | NUMBER | NULL | BOOLEAN |
330
+ |:-----------------:|:---------------------:|:---------------------:|:-------------------:|:-:|:-:|:-:|:-----------------------:|:---------------------:|:---------------------:|:---------------------:|
331
+ | S | array | object | | | | | | | | |
332
+ | array | [ values ] | | | | | | | | | |
333
+ | object | | { members } | | | | | | | | |
334
+ | values | non-empty-values | non-empty-values | | | ε | | non-empty-values | non-empty-values | non-empty-values | non-empty-values |
335
+ | non-empty-values | value addition-values | value addition-values | | | | | value addition-values | value addition-values | value addition-values | value addition-values |
336
+ | addition-values | | | , non-empty-values | | ε | | | | | |
337
+ | members | | | | | | ε | non-empty-members | | | |
338
+ | non-empty-members | | | | | | | member addition-members | | | |
339
+ | addition-members | | | , non-empty-members | | | ε | | | | |
340
+ | member | | | | | | | STRING : value | | | |
341
+ | value | array | object | | | | | STRING | NUMBER | NULL | BOOLEAN |
342
+
343
+ 对于上面的语法分析,这里有一个很棒的[工具](http://hackingoff.com/compilers/predict-first-follow-set)。
344
+
345
+ ### 实现 {#IterativeParserImplementation}
346
+
347
+ 基于这份解析表,一个直接的(常规的)将规则反向入栈的实现可以正常工作。
348
+
349
+ 在 RapidJSON 中,对直接的实现进行了一些修改:
350
+
351
+ 首先,在 RapidJSON 中,这份解析表被编码为状态机。
352
+ 规则由头部和主体组成。
353
+ 状态转换由规则构造。
354
+ 除此之外,额外的状态被添加到与 `array` 和 `object` 有关的规则。
355
+ 通过这种方式,生成数组值或对象成员可以只用一次状态转移便可完成,
356
+ 而不需要在直接的实现中的多次出栈/入栈操作。
357
+ 这也使得估计栈的大小更加容易。
358
+
359
+ 状态图如如下所示:
360
+
361
+ ![状态图](diagram/iterative-parser-states-diagram.png)
362
+
363
+ 第二,迭代解析器也在内部栈保存了数组的值个数和对象成员的数量,这也与传统的实现不同。