datahike-browser-tests 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (324) hide show
  1. package/.circleci/config.yml +405 -0
  2. package/.circleci/scripts/gen_ci.clj +194 -0
  3. package/.cirrus.yml +60 -0
  4. package/.clj-kondo/babashka/sci/config.edn +1 -0
  5. package/.clj-kondo/babashka/sci/sci/core.clj +9 -0
  6. package/.clj-kondo/config.edn +95 -0
  7. package/.dir-locals.el +2 -0
  8. package/.github/FUNDING.yml +3 -0
  9. package/.github/ISSUE_TEMPLATE/1-bug-report.yml +68 -0
  10. package/.github/ISSUE_TEMPLATE/2-feature-request.yml +28 -0
  11. package/.github/ISSUE_TEMPLATE/config.yml +6 -0
  12. package/.github/pull_request_template.md +24 -0
  13. package/.github/workflows/native-image.yml +84 -0
  14. package/LICENSE +203 -0
  15. package/README.md +273 -0
  16. package/bb/deps.edn +9 -0
  17. package/bb/resources/github-fingerprints +3 -0
  18. package/bb/resources/native-image-tests/run-bb-pod-tests.clj +162 -0
  19. package/bb/resources/native-image-tests/run-libdatahike-tests +12 -0
  20. package/bb/resources/native-image-tests/run-native-image-tests +74 -0
  21. package/bb/resources/native-image-tests/run-python-tests +22 -0
  22. package/bb/resources/native-image-tests/testconfig.attr-refs.edn +6 -0
  23. package/bb/resources/native-image-tests/testconfig.edn +5 -0
  24. package/bb/resources/template/.settings/org.eclipse.jdt.apt.core.prefs +2 -0
  25. package/bb/resources/template/.settings/org.eclipse.jdt.core.prefs +9 -0
  26. package/bb/resources/template/.settings/org.eclipse.m2e.core.prefs +4 -0
  27. package/bb/resources/template/pom.xml +22 -0
  28. package/bb/src/tools/build.clj +132 -0
  29. package/bb/src/tools/clj_kondo.clj +32 -0
  30. package/bb/src/tools/deploy.clj +26 -0
  31. package/bb/src/tools/examples.clj +19 -0
  32. package/bb/src/tools/npm.clj +100 -0
  33. package/bb/src/tools/python.clj +14 -0
  34. package/bb/src/tools/release.clj +94 -0
  35. package/bb/src/tools/test.clj +148 -0
  36. package/bb/src/tools/version.clj +47 -0
  37. package/bb.edn +269 -0
  38. package/benchmark/src/benchmark/cli.clj +195 -0
  39. package/benchmark/src/benchmark/compare.clj +157 -0
  40. package/benchmark/src/benchmark/config.clj +316 -0
  41. package/benchmark/src/benchmark/measure.clj +187 -0
  42. package/benchmark/src/benchmark/store.clj +190 -0
  43. package/benchmark/test/benchmark/measure_test.clj +156 -0
  44. package/build.clj +30 -0
  45. package/config.edn +49 -0
  46. package/deps.edn +138 -0
  47. package/dev/sandbox.clj +82 -0
  48. package/dev/sandbox.cljs +127 -0
  49. package/dev/sandbox_benchmarks.clj +27 -0
  50. package/dev/sandbox_client.clj +87 -0
  51. package/dev/sandbox_transact_bench.clj +109 -0
  52. package/dev/user.clj +79 -0
  53. package/doc/README.md +96 -0
  54. package/doc/adl/README.md +6 -0
  55. package/doc/adl/adr-000-adr.org +28 -0
  56. package/doc/adl/adr-001-attribute-references.org +15 -0
  57. package/doc/adl/adr-002-build-tooling.org +54 -0
  58. package/doc/adl/adr-003-db-meta-data.md +52 -0
  59. package/doc/adl/adr-004-github-flow.md +40 -0
  60. package/doc/adl/adr-XYZ-template.md +30 -0
  61. package/doc/adl/index.org +3 -0
  62. package/doc/assets/datahike-logo.svg +3 -0
  63. package/doc/assets/datahiking-invoice.org +85 -0
  64. package/doc/assets/hhtree2.png +0 -0
  65. package/doc/assets/network_topology.svg +624 -0
  66. package/doc/assets/perf.png +0 -0
  67. package/doc/assets/schema_mindmap.mm +132 -0
  68. package/doc/assets/schema_mindmap.svg +970 -0
  69. package/doc/assets/temporal_index.mm +74 -0
  70. package/doc/backend-development.md +78 -0
  71. package/doc/bb-pod.md +89 -0
  72. package/doc/benchmarking.md +360 -0
  73. package/doc/bindings/edn-conversion.md +383 -0
  74. package/doc/cli.md +162 -0
  75. package/doc/cljdoc.edn +27 -0
  76. package/doc/cljs-support.md +133 -0
  77. package/doc/config.md +406 -0
  78. package/doc/contributing.md +114 -0
  79. package/doc/datalog-vs-sql.md +210 -0
  80. package/doc/datomic_differences.md +109 -0
  81. package/doc/development/pull-api-ns.md +186 -0
  82. package/doc/development/pull-frame-state-diagram.jpg +0 -0
  83. package/doc/distributed.md +566 -0
  84. package/doc/entity_spec.md +92 -0
  85. package/doc/gc.md +273 -0
  86. package/doc/java-api.md +808 -0
  87. package/doc/javascript-api.md +421 -0
  88. package/doc/libdatahike.md +86 -0
  89. package/doc/logging_and_error_handling.md +43 -0
  90. package/doc/norms.md +66 -0
  91. package/doc/schema-migration.md +85 -0
  92. package/doc/schema.md +287 -0
  93. package/doc/storage-backends.md +363 -0
  94. package/doc/store-id-refactoring.md +596 -0
  95. package/doc/time_variance.md +325 -0
  96. package/doc/unstructured.md +167 -0
  97. package/doc/versioning.md +261 -0
  98. package/examples/basic/README.md +19 -0
  99. package/examples/basic/deps.edn +6 -0
  100. package/examples/basic/docker-compose.yml +13 -0
  101. package/examples/basic/src/examples/core.clj +60 -0
  102. package/examples/basic/src/examples/schema.clj +155 -0
  103. package/examples/basic/src/examples/store.clj +60 -0
  104. package/examples/basic/src/examples/time_travel.clj +185 -0
  105. package/examples/java/.settings/org.eclipse.core.resources.prefs +3 -0
  106. package/examples/java/.settings/org.eclipse.jdt.apt.core.prefs +2 -0
  107. package/examples/java/.settings/org.eclipse.jdt.core.prefs +9 -0
  108. package/examples/java/.settings/org.eclipse.m2e.core.prefs +4 -0
  109. package/examples/java/README.md +162 -0
  110. package/examples/java/pom.xml +62 -0
  111. package/examples/java/src/main/java/examples/QuickStart.java +115 -0
  112. package/examples/java/src/main/java/examples/SchemaExample.java +148 -0
  113. package/examples/java/src/main/java/examples/TimeTravelExample.java +121 -0
  114. package/flake.lock +27 -0
  115. package/flake.nix +27 -0
  116. package/http-server/datahike/http/middleware.clj +75 -0
  117. package/http-server/datahike/http/server.clj +269 -0
  118. package/java/src/datahike/java/Database.java +274 -0
  119. package/java/src/datahike/java/Datahike.java +281 -0
  120. package/java/src/datahike/java/DatahikeGeneratedTest.java +349 -0
  121. package/java/src/datahike/java/DatahikeTest.java +370 -0
  122. package/java/src/datahike/java/EDN.java +170 -0
  123. package/java/src/datahike/java/IEntity.java +11 -0
  124. package/java/src/datahike/java/Keywords.java +161 -0
  125. package/java/src/datahike/java/SchemaFlexibility.java +52 -0
  126. package/java/src/datahike/java/Util.java +219 -0
  127. package/karma.conf.js +19 -0
  128. package/libdatahike/compile-cpp +7 -0
  129. package/libdatahike/src/datahike/impl/LibDatahikeBase.java +203 -0
  130. package/libdatahike/src/datahike/impl/libdatahike.clj +59 -0
  131. package/libdatahike/src/test_cpp.cpp +61 -0
  132. package/npm-package/PUBLISHING.md +140 -0
  133. package/npm-package/README.md +226 -0
  134. package/npm-package/package.template.json +34 -0
  135. package/npm-package/test-isomorphic.ts +281 -0
  136. package/npm-package/test.js +557 -0
  137. package/npm-package/typescript-test.ts +70 -0
  138. package/package.json +16 -0
  139. package/pydatahike/README.md +569 -0
  140. package/pydatahike/pyproject.toml +91 -0
  141. package/pydatahike/setup.py +42 -0
  142. package/pydatahike/src/datahike/__init__.py +134 -0
  143. package/pydatahike/src/datahike/_native.py +250 -0
  144. package/pydatahike/src/datahike/_version.py +2 -0
  145. package/pydatahike/src/datahike/database.py +722 -0
  146. package/pydatahike/src/datahike/edn.py +311 -0
  147. package/pydatahike/src/datahike/py.typed +0 -0
  148. package/pydatahike/tests/conftest.py +17 -0
  149. package/pydatahike/tests/test_basic.py +170 -0
  150. package/pydatahike/tests/test_database.py +51 -0
  151. package/pydatahike/tests/test_edn_conversion.py +299 -0
  152. package/pydatahike/tests/test_query.py +99 -0
  153. package/pydatahike/tests/test_schema.py +55 -0
  154. package/resources/clj-kondo.exports/io.replikativ/datahike/config.edn +5 -0
  155. package/resources/example_server.edn +4 -0
  156. package/shadow-cljs.edn +56 -0
  157. package/src/data_readers.clj +7 -0
  158. package/src/datahike/api/impl.cljc +176 -0
  159. package/src/datahike/api/specification.cljc +633 -0
  160. package/src/datahike/api/types.cljc +261 -0
  161. package/src/datahike/api.cljc +41 -0
  162. package/src/datahike/array.cljc +99 -0
  163. package/src/datahike/cli.clj +166 -0
  164. package/src/datahike/cljs.cljs +6 -0
  165. package/src/datahike/codegen/cli.clj +406 -0
  166. package/src/datahike/codegen/clj_kondo.clj +291 -0
  167. package/src/datahike/codegen/java.clj +403 -0
  168. package/src/datahike/codegen/naming.cljc +33 -0
  169. package/src/datahike/codegen/native.clj +559 -0
  170. package/src/datahike/codegen/pod.clj +488 -0
  171. package/src/datahike/codegen/python.clj +838 -0
  172. package/src/datahike/codegen/report.clj +55 -0
  173. package/src/datahike/codegen/typescript.clj +262 -0
  174. package/src/datahike/codegen/validation.clj +145 -0
  175. package/src/datahike/config.cljc +294 -0
  176. package/src/datahike/connections.cljc +16 -0
  177. package/src/datahike/connector.cljc +265 -0
  178. package/src/datahike/constants.cljc +142 -0
  179. package/src/datahike/core.cljc +297 -0
  180. package/src/datahike/datom.cljc +459 -0
  181. package/src/datahike/db/interface.cljc +119 -0
  182. package/src/datahike/db/search.cljc +305 -0
  183. package/src/datahike/db/transaction.cljc +937 -0
  184. package/src/datahike/db/utils.cljc +338 -0
  185. package/src/datahike/db.cljc +956 -0
  186. package/src/datahike/experimental/unstructured.cljc +126 -0
  187. package/src/datahike/experimental/versioning.cljc +172 -0
  188. package/src/datahike/externs.js +31 -0
  189. package/src/datahike/gc.cljc +69 -0
  190. package/src/datahike/http/client.clj +188 -0
  191. package/src/datahike/http/writer.clj +79 -0
  192. package/src/datahike/impl/entity.cljc +218 -0
  193. package/src/datahike/index/interface.cljc +93 -0
  194. package/src/datahike/index/persistent_set.cljc +469 -0
  195. package/src/datahike/index/utils.cljc +44 -0
  196. package/src/datahike/index.cljc +32 -0
  197. package/src/datahike/js/api.cljs +172 -0
  198. package/src/datahike/js/api_macros.clj +22 -0
  199. package/src/datahike/js.cljs +163 -0
  200. package/src/datahike/json.cljc +209 -0
  201. package/src/datahike/lru.cljc +146 -0
  202. package/src/datahike/migrate.clj +39 -0
  203. package/src/datahike/norm/norm.clj +245 -0
  204. package/src/datahike/online_gc.cljc +252 -0
  205. package/src/datahike/pod.clj +155 -0
  206. package/src/datahike/pull_api.cljc +325 -0
  207. package/src/datahike/query.cljc +1945 -0
  208. package/src/datahike/query_stats.cljc +88 -0
  209. package/src/datahike/readers.cljc +62 -0
  210. package/src/datahike/remote.cljc +218 -0
  211. package/src/datahike/schema.cljc +228 -0
  212. package/src/datahike/schema_cache.cljc +42 -0
  213. package/src/datahike/spec.cljc +101 -0
  214. package/src/datahike/store.cljc +80 -0
  215. package/src/datahike/tools.cljc +308 -0
  216. package/src/datahike/transit.cljc +80 -0
  217. package/src/datahike/writer.cljc +239 -0
  218. package/src/datahike/writing.cljc +362 -0
  219. package/src/deps.cljs +1 -0
  220. package/src-hitchhiker-tree/datahike/index/hitchhiker_tree/insert.cljc +76 -0
  221. package/src-hitchhiker-tree/datahike/index/hitchhiker_tree/upsert.cljc +128 -0
  222. package/src-hitchhiker-tree/datahike/index/hitchhiker_tree.cljc +213 -0
  223. package/test/datahike/backward_compatibility_test/src/backward_test.clj +37 -0
  224. package/test/datahike/integration_test/config_record_file_test.clj +14 -0
  225. package/test/datahike/integration_test/config_record_test.clj +14 -0
  226. package/test/datahike/integration_test/depr_config_uri_test.clj +15 -0
  227. package/test/datahike/integration_test/return_map_test.clj +62 -0
  228. package/test/datahike/integration_test.cljc +67 -0
  229. package/test/datahike/norm/norm_test.clj +124 -0
  230. package/test/datahike/norm/resources/naming-and-sorting-test/001-a1-example.edn +5 -0
  231. package/test/datahike/norm/resources/naming-and-sorting-test/002-a2-example.edn +5 -0
  232. package/test/datahike/norm/resources/naming-and-sorting-test/003-tx-fn-test.edn +1 -0
  233. package/test/datahike/norm/resources/naming-and-sorting-test/004-tx-data-and-tx-fn-test.edn +5 -0
  234. package/test/datahike/norm/resources/naming-and-sorting-test/01-transact-basic-characters.edn +2 -0
  235. package/test/datahike/norm/resources/naming-and-sorting-test/02 add occupation.edn +5 -0
  236. package/test/datahike/norm/resources/naming-and-sorting-test/checksums.edn +12 -0
  237. package/test/datahike/norm/resources/simple-test/001-a1-example.edn +5 -0
  238. package/test/datahike/norm/resources/simple-test/002-a2-example.edn +5 -0
  239. package/test/datahike/norm/resources/simple-test/checksums.edn +4 -0
  240. package/test/datahike/norm/resources/tx-data-and-tx-fn-test/first/001-a1-example.edn +5 -0
  241. package/test/datahike/norm/resources/tx-data-and-tx-fn-test/first/002-a2-example.edn +5 -0
  242. package/test/datahike/norm/resources/tx-data-and-tx-fn-test/first/003-tx-fn-test.edn +1 -0
  243. package/test/datahike/norm/resources/tx-data-and-tx-fn-test/first/checksums.edn +6 -0
  244. package/test/datahike/norm/resources/tx-data-and-tx-fn-test/second/004-tx-data-and-tx-fn-test.edn +5 -0
  245. package/test/datahike/norm/resources/tx-data-and-tx-fn-test/second/checksums.edn +2 -0
  246. package/test/datahike/norm/resources/tx-fn-test/first/001-a1-example.edn +5 -0
  247. package/test/datahike/norm/resources/tx-fn-test/first/002-a2-example.edn +5 -0
  248. package/test/datahike/norm/resources/tx-fn-test/first/checksums.edn +4 -0
  249. package/test/datahike/norm/resources/tx-fn-test/second/003-tx-fn-test.edn +1 -0
  250. package/test/datahike/norm/resources/tx-fn-test/second/checksums.edn +2 -0
  251. package/test/datahike/test/api_test.cljc +895 -0
  252. package/test/datahike/test/array_test.cljc +40 -0
  253. package/test/datahike/test/attribute_refs/datoms_test.cljc +140 -0
  254. package/test/datahike/test/attribute_refs/db_test.cljc +42 -0
  255. package/test/datahike/test/attribute_refs/differences_test.cljc +515 -0
  256. package/test/datahike/test/attribute_refs/entity_test.cljc +89 -0
  257. package/test/datahike/test/attribute_refs/pull_api_test.cljc +320 -0
  258. package/test/datahike/test/attribute_refs/query_find_specs_test.cljc +59 -0
  259. package/test/datahike/test/attribute_refs/query_fns_test.cljc +130 -0
  260. package/test/datahike/test/attribute_refs/query_interop_test.cljc +47 -0
  261. package/test/datahike/test/attribute_refs/query_not_test.cljc +193 -0
  262. package/test/datahike/test/attribute_refs/query_or_test.cljc +137 -0
  263. package/test/datahike/test/attribute_refs/query_pull_test.cljc +156 -0
  264. package/test/datahike/test/attribute_refs/query_rules_test.cljc +176 -0
  265. package/test/datahike/test/attribute_refs/query_test.cljc +241 -0
  266. package/test/datahike/test/attribute_refs/temporal_search.cljc +22 -0
  267. package/test/datahike/test/attribute_refs/transact_test.cljc +220 -0
  268. package/test/datahike/test/attribute_refs/utils.cljc +128 -0
  269. package/test/datahike/test/cache_test.cljc +38 -0
  270. package/test/datahike/test/components_test.cljc +92 -0
  271. package/test/datahike/test/config_test.cljc +158 -0
  272. package/test/datahike/test/core_test.cljc +105 -0
  273. package/test/datahike/test/datom_test.cljc +44 -0
  274. package/test/datahike/test/db_test.cljc +54 -0
  275. package/test/datahike/test/entity_spec_test.cljc +159 -0
  276. package/test/datahike/test/entity_test.cljc +103 -0
  277. package/test/datahike/test/explode_test.cljc +143 -0
  278. package/test/datahike/test/filter_test.cljc +75 -0
  279. package/test/datahike/test/gc_test.cljc +159 -0
  280. package/test/datahike/test/http/server_test.clj +192 -0
  281. package/test/datahike/test/http/writer_test.clj +86 -0
  282. package/test/datahike/test/ident_test.cljc +32 -0
  283. package/test/datahike/test/index_test.cljc +345 -0
  284. package/test/datahike/test/insert.cljc +125 -0
  285. package/test/datahike/test/java_bindings_test.clj +6 -0
  286. package/test/datahike/test/listen_test.cljc +41 -0
  287. package/test/datahike/test/lookup_refs_test.cljc +266 -0
  288. package/test/datahike/test/lru_test.cljc +27 -0
  289. package/test/datahike/test/migrate_test.clj +297 -0
  290. package/test/datahike/test/model/core.cljc +376 -0
  291. package/test/datahike/test/model/invariant.cljc +142 -0
  292. package/test/datahike/test/model/rng.cljc +82 -0
  293. package/test/datahike/test/model_test.clj +217 -0
  294. package/test/datahike/test/nodejs_test.cljs +262 -0
  295. package/test/datahike/test/online_gc_test.cljc +475 -0
  296. package/test/datahike/test/pod_test.clj +369 -0
  297. package/test/datahike/test/pull_api_test.cljc +474 -0
  298. package/test/datahike/test/purge_test.cljc +144 -0
  299. package/test/datahike/test/query_aggregates_test.cljc +101 -0
  300. package/test/datahike/test/query_find_specs_test.cljc +52 -0
  301. package/test/datahike/test/query_fns_test.cljc +523 -0
  302. package/test/datahike/test/query_interop_test.cljc +47 -0
  303. package/test/datahike/test/query_not_test.cljc +189 -0
  304. package/test/datahike/test/query_or_test.cljc +158 -0
  305. package/test/datahike/test/query_pull_test.cljc +147 -0
  306. package/test/datahike/test/query_rules_test.cljc +248 -0
  307. package/test/datahike/test/query_stats_test.cljc +218 -0
  308. package/test/datahike/test/query_test.cljc +984 -0
  309. package/test/datahike/test/schema_test.cljc +424 -0
  310. package/test/datahike/test/specification_test.cljc +30 -0
  311. package/test/datahike/test/store_test.cljc +78 -0
  312. package/test/datahike/test/stress_test.cljc +57 -0
  313. package/test/datahike/test/time_variance_test.cljc +518 -0
  314. package/test/datahike/test/tools_test.clj +134 -0
  315. package/test/datahike/test/transact_test.cljc +518 -0
  316. package/test/datahike/test/tuples_test.cljc +564 -0
  317. package/test/datahike/test/unstructured_test.cljc +291 -0
  318. package/test/datahike/test/upsert_impl_test.cljc +205 -0
  319. package/test/datahike/test/upsert_test.cljc +363 -0
  320. package/test/datahike/test/utils.cljc +110 -0
  321. package/test/datahike/test/validation_test.cljc +48 -0
  322. package/test/datahike/test/versioning_test.cljc +56 -0
  323. package/test/datahike/test.cljc +66 -0
  324. package/tests.edn +24 -0
package/doc/gc.md ADDED
@@ -0,0 +1,273 @@
1
+ # Garbage Collection
2
+
3
+ Datahike uses persistent data structures that enable structural sharing—each update creates a new version efficiently by reusing unchanged parts. This allows [time-travel queries](./time_variance.md) and [git-like versioning](./versioning.md), but storage grows over time as old snapshots accumulate.
4
+
5
+ **Garbage collection removes old database snapshots from storage while preserving current branch heads.**
6
+
7
+ ## GC vs Purging
8
+
9
+ Don't confuse garbage collection with data purging:
10
+
11
+ - **Garbage Collection** (this document): Removes old database *snapshots* to reclaim storage. Used for routine storage maintenance.
12
+ - **[Data Purging](./time_variance.md#data-purging)**: Permanently deletes specific *data* for privacy compliance (GDPR, HIPAA, CCPA). Used only when legally required.
13
+
14
+ ## How Garbage Collection Works
15
+
16
+ GC whitelists all current branches and marks snapshots as reachable based on a grace period. Snapshots older than the grace period are deleted from storage, but **branch heads are always retained** regardless of age.
17
+
18
+ ## Basic Usage
19
+
20
+ ```clojure
21
+ (require '[datahike.api :as d]
22
+ '[superv.async :refer [<?? S]])
23
+
24
+ ;; Remove only deleted branches, keep all snapshots
25
+ (<?? S (d/gc-storage conn))
26
+ ;; => #{...} ; set of deleted storage blobs
27
+ ```
28
+
29
+ Running without a date removes **only deleted branches**—all snapshots on active branches are preserved. This is safe to run anytime and reclaims storage from old experimental branches.
30
+
31
+ **Note:** Returns a `core.async` channel. Use `<??` to block, or run without it for background execution. GC requires no coordination and won't slow down transactions or reads.
32
+
33
+ ## Grace Periods for Distributed Readers
34
+
35
+ Datahike's [Distributed Index Space](./distributed.md) allows readers to access storage directly without coordination. This is powerful for scalability but means **long-running processes might read from old snapshots for hours**.
36
+
37
+ Examples of long-running readers:
38
+ - **Reporting jobs**: Generate daily/weekly reports by querying yesterday's snapshot
39
+ - **Analytics pipelines**: Process historical data over several hours
40
+ - **Monitoring dashboards**: Display metrics from recent snapshots
41
+ - **Backup processes**: Copy database state while it's being updated
42
+
43
+ **The grace period ensures these readers don't encounter missing data.** Snapshots created after the grace period date are kept; older ones are deleted.
44
+
45
+ ```clojure
46
+ (require '[datahike.api :as d])
47
+
48
+ ;; Keep last 7 days of snapshots
49
+ (let [seven-days-ago (java.util.Date. (- (System/currentTimeMillis)
50
+ (* 7 24 60 60 1000)))]
51
+ (<?? S (d/gc-storage conn seven-days-ago)))
52
+
53
+ ;; Keep last 30 days (common for compliance)
54
+ (let [thirty-days-ago (java.util.Date. (- (System/currentTimeMillis)
55
+ (* 30 24 60 60 1000)))]
56
+ (<?? S (d/gc-storage conn thirty-days-ago)))
57
+
58
+ ;; Keep last 24 hours (for fast-moving data)
59
+ (let [yesterday (java.util.Date. (- (System/currentTimeMillis)
60
+ (* 24 60 60 1000)))]
61
+ (<?? S (d/gc-storage conn yesterday)))
62
+ ```
63
+
64
+ **Choosing a grace period:**
65
+ - Consider your longest-running reader process
66
+ - Add buffer time for safety (if longest job is 2 hours, use 4-6 hours)
67
+ - Balance storage costs against reader safety
68
+ - Monitor reader patterns before shortening grace periods
69
+
70
+ **Branch heads are always kept** regardless of the grace period—only intermediate snapshots are removed.
71
+
72
+ ## Online Garbage Collection (Incremental GC)
73
+
74
+ > ⚠️ **EXPERIMENTAL FEATURE**
75
+
76
+ Online GC automatically deletes freed index nodes during transaction commits, preventing garbage accumulation during bulk imports and high-write workloads.
77
+
78
+ > Online GC is currently an experimental feature. While it has been tested extensively in Clojure/JVM and includes safety mechanisms for multi-branch databases, use with caution in production. We recommend:
79
+ > - Thorough testing in your specific use case before production deployment
80
+ > - Monitoring freed address counts to verify expected behavior
81
+ > - Using it primarily for bulk imports and high-write workloads where it's most beneficial
82
+ > - **ClojureScript**: Online GC functionality is available in CLJS but has not been tested in big bulk loads yet. JVM testing is more comprehensive.
83
+ > - Reporting any issues at https://github.com/replikativ/datahike/issues
84
+
85
+ ### How Online GC Works
86
+
87
+ > Online GC is **ONLY safe for single-branch databases**.
88
+ > For multi-branch databases, online GC is automatically disabled because freed nodes
89
+ > from one branch may still be referenced by other branches through structural sharing.
90
+ > Use offline GC (`d/gc-storage`) for multi-branch cleanup instead.
91
+
92
+ When PSS (Persistent Sorted Set) index trees are modified during transactions, old index nodes become unreachable. Online GC tracks these freed addresses with timestamps and deletes them incrementally:
93
+
94
+ 1. **During transaction** (transient mode): PSS calls `markFreed()` for each replaced index node
95
+ 2. **At commit time**: Freed addresses older than the grace period are batch-deleted
96
+ 3. **Multi-branch safety check**: If multiple branches detected, GC is skipped entirely
97
+ 4. **No full tree walk**: Only freed addresses are deleted, not requiring expensive tree traversal
98
+
99
+ **Key benefits:**
100
+ - **Prevents unbounded storage growth** during bulk imports (single-branch only)
101
+ - **Incremental deletion**: Small batches per commit, low overhead
102
+ - **Grace period support**: Safe for concurrent readers accessing old snapshots
103
+ - **Multi-branch safety**: Automatically disabled to prevent corruption
104
+ - **Configurable**: Can be disabled, tuned, or run in background
105
+
106
+ ### Configuration
107
+
108
+ Enable online GC in your database config:
109
+
110
+ ```clojure
111
+ ;; For bulk imports (no concurrent readers, single-branch)
112
+ ;; See "Address Recycling" section below for details
113
+ {:online-gc {:enabled? true
114
+ :grace-period-ms 0 ;; Recycle immediately
115
+ :max-batch 10000} ;; Large batches for efficiency
116
+ :crypto-hash? false} ;; Required for address recycling
117
+
118
+ ;; For production (concurrent readers)
119
+ {:online-gc {:enabled? true
120
+ :grace-period-ms 300000 ;; 5 minutes
121
+ :max-batch 1000}} ;; Smaller batches
122
+
123
+ ;; Disabled (default)
124
+ {:online-gc {:enabled? false}}
125
+ ```
126
+
127
+ **Configuration options:**
128
+
129
+ - `:enabled?` - Enable/disable online GC (default: `false`)
130
+ - `:grace-period-ms` - Minimum age in milliseconds before deletion (default: `60000` = 1 minute)
131
+ - `:max-batch` - Maximum addresses to delete per commit (default: `1000`)
132
+ - `:sync?` - Synchronous deletion (always `false` inside commits for async operation)
133
+
134
+ ### Background GC Mode
135
+
136
+ For production systems, run GC in a background thread instead of blocking commits:
137
+
138
+ ```clojure
139
+ (require '[datahike.online-gc :as online-gc])
140
+
141
+ ;; Start background GC
142
+ (def stop-ch (online-gc/start-background-gc!
143
+ (:store @conn)
144
+ {:grace-period-ms 60000 ;; 1 minute
145
+ :interval-ms 10000 ;; Run every 10 seconds
146
+ :max-batch 1000}))
147
+
148
+ ;; Later, stop background GC
149
+ (clojure.core.async/close! stop-ch)
150
+ ```
151
+
152
+ **Background mode advantages:**
153
+ - Non-blocking: Doesn't slow down commits
154
+ - Periodic cleanup: Runs every N milliseconds
155
+ - Graceful shutdown: Close channel to stop
156
+
157
+ ### Address Recycling (Bulk Import Optimization)
158
+
159
+ > ⚠️ **EXPERIMENTAL FEATURE**
160
+ >
161
+ > Address recycling is an experimental optimization. It has been designed with safety checks (multi-branch detection, grace periods), but should be thoroughly tested in your environment before production use.
162
+
163
+ Online GC includes **address recycling**—freed addresses are reused for new index nodes instead of being deleted from storage. This optimization is particularly powerful for bulk imports.
164
+
165
+ **How it works:**
166
+ 1. When index trees are modified, old root addresses are marked as freed
167
+ 2. Online GC moves eligible addresses to a freelist (grace period applies)
168
+ 3. New index nodes reuse addresses from the freelist instead of generating new UUIDs
169
+ 4. LMDB overwrites the recycled address with new data
170
+
171
+ **Benefits:**
172
+ - **Zero delete operations**: Converts O(freed_nodes) deletes to O(1) freelist append
173
+ - **Reduces LMDB fragmentation**: Addresses are reused rather than accumulating
174
+ - **Perfect for bulk imports**: With `:grace-period-ms 0`, recycling happens immediately
175
+ - **Minimal overhead**: No tree traversal or complex reachability analysis
176
+
177
+ **Safety limitations:**
178
+
179
+ **Address recycling is ONLY safe for:**
180
+ - **Single-branch databases** (shared nodes across branches would be corrupted)
181
+ - **No long-lived readers** (or grace period exceeds reader lifetime)
182
+ - **Bulk import scenarios** (write-only, no concurrent queries)
183
+
184
+ **Online GC is automatically disabled when:**
185
+ - Multiple branches exist (online GC completely skipped - use offline GC instead)
186
+ Reason: Freed nodes from one branch may still be referenced by other branches
187
+ through structural sharing
188
+ - Using `:crypto-hash? true` with recycling (falls back to deletion mode)
189
+
190
+ ### Bulk Import Configuration
191
+
192
+ For maximum performance during bulk imports where no concurrent readers exist:
193
+
194
+ ```clojure
195
+ ;; Optimal bulk import configuration
196
+ {:online-gc {:enabled? true
197
+ :grace-period-ms 0 ;; Recycle immediately (no readers)
198
+ :max-batch 10000} ;; Large batch (only for delete fallback)
199
+ :crypto-hash? false ;; Required for recycling
200
+ :branch :db} ;; Single branch only
201
+
202
+ ;; Example bulk import
203
+ (let [cfg {:store {:backend :file :path "/data/bulk-import"}
204
+ :online-gc {:enabled? true :grace-period-ms 0}
205
+ :crypto-hash? false}
206
+ conn (d/connect cfg)]
207
+ ;; Import millions of entities
208
+ (doseq [batch entity-batches]
209
+ (d/transact conn batch))
210
+ ;; Storage stays bounded - addresses are recycled
211
+ (d/release conn))
212
+ ```
213
+
214
+ **Bulk import best practices:**
215
+ 1. Set `:grace-period-ms 0` (no concurrent readers to protect)
216
+ 2. Use `:crypto-hash? false` (enables address recycling)
217
+ 3. Stay on single branch (`:branch :db`)
218
+ 4. Increase `:max-batch` for efficiency (only affects delete fallback)
219
+ 5. Monitor freed address counts to verify recycling is working
220
+
221
+ **Verifying address recycling:**
222
+ - Check logs for `"Online GC: recycling N addresses to freelist"`
223
+ - If you see `"Online GC: skipped (multi-branch detected)"`, ensure single branch
224
+ (multi-branch databases require offline GC instead)
225
+ - Freed address counts should drop to zero after each transaction
226
+
227
+ ### Online GC vs Offline GC
228
+
229
+ **Online GC** (incremental):
230
+ - Runs during commits
231
+ - Deletes only **freed index nodes** from recent transactions
232
+ - Fast: No tree traversal required
233
+ - **With recycling**: No delete operations at all, just freelist management
234
+ - **ONLY for single-branch databases** - automatically disabled for multi-branch
235
+ - Best for: Bulk imports, high-write workloads
236
+
237
+ **Offline GC** (`d/gc-storage`):
238
+ - Runs manually
239
+ - Deletes **entire old snapshots** by walking all branches
240
+ - Slower: Full tree traversal and marking
241
+ - Handles **multi-branch databases** safely through reachability analysis
242
+ - **Required for multi-branch databases** (online GC doesn't work)
243
+ - Best for: Periodic maintenance, deleting old branches, multi-branch cleanup
244
+
245
+ **Use both:** Online GC for incremental cleanup during single-branch writes, offline GC for periodic deep cleaning and all multi-branch scenarios.
246
+
247
+ ## Automatic Garbage Collection
248
+
249
+ With online GC enabled, garbage collection becomes largely automatic during normal operation. Manual `d/gc-storage` runs are only needed for:
250
+ - Deleting old branches
251
+ - Periodic deep cleaning (monthly/quarterly)
252
+ - Compliance-driven snapshot removal
253
+
254
+ ## When to Run GC
255
+
256
+ - **After deleting branches**: Immediately reclaim storage
257
+ - **Periodic maintenance**: Weekly/monthly based on storage growth
258
+ - **Storage alerts**: When approaching capacity limits
259
+ - **Version cleanup**: After completing long-running migrations
260
+
261
+ ## What Gets Deleted
262
+
263
+ GC removes:
264
+ - Old database snapshots older than the grace period
265
+ - Deleted branches and their snapshots
266
+ - Unreachable index nodes from old snapshots
267
+
268
+ GC preserves:
269
+ - All current branch heads (always)
270
+ - Snapshots created after the grace period
271
+ - All data on retained snapshots (GC doesn't delete data, only snapshots)
272
+
273
+ **Remember:** For deleting specific data (GDPR compliance), use [data purging](./time_variance.md#data-purging), not garbage collection.