@lix-js/sdk 0.6.0-preview.1 → 0.6.0-preview.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (205) hide show
  1. package/SKILL.md +304 -320
  2. package/dist/engine-wasm/wasm/lix_engine.d.ts +5 -0
  3. package/dist/engine-wasm/wasm/lix_engine.js +9 -13
  4. package/dist/engine-wasm/wasm/lix_engine.wasm +0 -0
  5. package/dist/engine-wasm/wasm/lix_engine.wasm.d.ts +1 -0
  6. package/dist/generated/builtin-schemas.d.ts +87 -162
  7. package/dist/generated/builtin-schemas.js +139 -236
  8. package/dist/open-lix.d.ts +103 -14
  9. package/dist/open-lix.js +3 -0
  10. package/dist/sqlite/index.js +99 -22
  11. package/dist-engine-src/README.md +18 -0
  12. package/dist-engine-src/src/backend/kv.rs +358 -0
  13. package/dist-engine-src/src/backend/mod.rs +12 -0
  14. package/dist-engine-src/src/backend/testing.rs +658 -0
  15. package/dist-engine-src/src/backend/types.rs +96 -0
  16. package/dist-engine-src/src/binary_cas/chunking.rs +31 -0
  17. package/dist-engine-src/src/binary_cas/codec.rs +346 -0
  18. package/dist-engine-src/src/binary_cas/context.rs +139 -0
  19. package/dist-engine-src/src/binary_cas/kv.rs +1063 -0
  20. package/dist-engine-src/src/binary_cas/mod.rs +11 -0
  21. package/dist-engine-src/src/binary_cas/types.rs +121 -0
  22. package/dist-engine-src/src/catalog/context.rs +412 -0
  23. package/dist-engine-src/src/catalog/mod.rs +10 -0
  24. package/dist-engine-src/src/catalog/schema.rs +4 -0
  25. package/dist-engine-src/src/catalog/snapshot.rs +1114 -0
  26. package/dist-engine-src/src/cel/context.rs +86 -0
  27. package/dist-engine-src/src/cel/error.rs +19 -0
  28. package/dist-engine-src/src/cel/mod.rs +8 -0
  29. package/dist-engine-src/src/cel/provider.rs +9 -0
  30. package/dist-engine-src/src/cel/runtime.rs +167 -0
  31. package/dist-engine-src/src/cel/value.rs +50 -0
  32. package/dist-engine-src/src/commit_graph/context.rs +901 -0
  33. package/dist-engine-src/src/commit_graph/mod.rs +11 -0
  34. package/dist-engine-src/src/commit_graph/types.rs +109 -0
  35. package/dist-engine-src/src/commit_graph/walker.rs +756 -0
  36. package/dist-engine-src/src/commit_store/codec.rs +887 -0
  37. package/dist-engine-src/src/commit_store/context.rs +944 -0
  38. package/dist-engine-src/src/commit_store/materialization.rs +84 -0
  39. package/dist-engine-src/src/commit_store/mod.rs +16 -0
  40. package/dist-engine-src/src/commit_store/storage.rs +600 -0
  41. package/dist-engine-src/src/commit_store/types.rs +215 -0
  42. package/dist-engine-src/src/common/error.rs +313 -0
  43. package/dist-engine-src/src/common/fingerprint.rs +3 -0
  44. package/dist-engine-src/src/common/fs_path.rs +1336 -0
  45. package/dist-engine-src/src/common/identity.rs +145 -0
  46. package/dist-engine-src/src/common/json_pointer.rs +67 -0
  47. package/dist-engine-src/src/common/metadata.rs +40 -0
  48. package/dist-engine-src/src/common/mod.rs +23 -0
  49. package/dist-engine-src/src/common/types.rs +105 -0
  50. package/dist-engine-src/src/common/wire.rs +222 -0
  51. package/dist-engine-src/src/domain.rs +324 -0
  52. package/dist-engine-src/src/engine.rs +225 -0
  53. package/dist-engine-src/src/entity_identity.rs +405 -0
  54. package/dist-engine-src/src/functions/context.rs +292 -0
  55. package/dist-engine-src/src/functions/deterministic.rs +113 -0
  56. package/dist-engine-src/src/functions/mod.rs +18 -0
  57. package/dist-engine-src/src/functions/provider.rs +130 -0
  58. package/dist-engine-src/src/functions/state.rs +336 -0
  59. package/dist-engine-src/src/functions/types.rs +37 -0
  60. package/dist-engine-src/src/init.rs +558 -0
  61. package/dist-engine-src/src/json_store/compression.rs +77 -0
  62. package/dist-engine-src/src/json_store/context.rs +423 -0
  63. package/dist-engine-src/src/json_store/encoded.rs +15 -0
  64. package/dist-engine-src/src/json_store/mod.rs +12 -0
  65. package/dist-engine-src/src/json_store/store.rs +1109 -0
  66. package/dist-engine-src/src/json_store/types.rs +217 -0
  67. package/dist-engine-src/src/lib.rs +62 -0
  68. package/dist-engine-src/src/live_state/context.rs +2019 -0
  69. package/dist-engine-src/src/live_state/mod.rs +15 -0
  70. package/dist-engine-src/src/live_state/overlay.rs +75 -0
  71. package/dist-engine-src/src/live_state/reader.rs +23 -0
  72. package/dist-engine-src/src/live_state/types.rs +222 -0
  73. package/dist-engine-src/src/live_state/visibility.rs +223 -0
  74. package/dist-engine-src/src/plugin/archive.rs +438 -0
  75. package/dist-engine-src/src/plugin/component.rs +183 -0
  76. package/dist-engine-src/src/plugin/install.rs +619 -0
  77. package/dist-engine-src/src/plugin/manifest.rs +516 -0
  78. package/dist-engine-src/src/plugin/materializer.rs +477 -0
  79. package/dist-engine-src/src/plugin/mod.rs +33 -0
  80. package/dist-engine-src/src/plugin/plugin_manifest.json +118 -0
  81. package/dist-engine-src/src/plugin/storage.rs +74 -0
  82. package/dist-engine-src/src/schema/annotations/defaults.rs +275 -0
  83. package/dist-engine-src/src/schema/annotations/mod.rs +1 -0
  84. package/dist-engine-src/src/schema/builtin/lix_account.json +21 -0
  85. package/dist-engine-src/src/schema/builtin/lix_active_account.json +29 -0
  86. package/dist-engine-src/src/schema/builtin/lix_binary_blob_ref.json +29 -0
  87. package/dist-engine-src/src/schema/builtin/lix_change.json +63 -0
  88. package/dist-engine-src/src/schema/builtin/lix_change_author.json +45 -0
  89. package/dist-engine-src/src/schema/builtin/lix_commit.json +24 -0
  90. package/dist-engine-src/src/schema/builtin/lix_commit_edge.json +53 -0
  91. package/dist-engine-src/src/schema/builtin/lix_directory_descriptor.json +52 -0
  92. package/dist-engine-src/src/schema/builtin/lix_file_descriptor.json +52 -0
  93. package/dist-engine-src/src/schema/builtin/lix_key_value.json +40 -0
  94. package/dist-engine-src/src/schema/builtin/lix_label.json +29 -0
  95. package/dist-engine-src/src/schema/builtin/lix_label_assignment.json +74 -0
  96. package/dist-engine-src/src/schema/builtin/lix_registered_schema.json +25 -0
  97. package/dist-engine-src/src/schema/builtin/lix_version_descriptor.json +34 -0
  98. package/dist-engine-src/src/schema/builtin/lix_version_ref.json +48 -0
  99. package/dist-engine-src/src/schema/builtin/mod.rs +222 -0
  100. package/dist-engine-src/src/schema/compatibility.rs +787 -0
  101. package/dist-engine-src/src/schema/definition.json +187 -0
  102. package/dist-engine-src/src/schema/definition.rs +742 -0
  103. package/dist-engine-src/src/schema/key.rs +138 -0
  104. package/dist-engine-src/src/schema/mod.rs +20 -0
  105. package/dist-engine-src/src/schema/seed.rs +14 -0
  106. package/dist-engine-src/src/schema/tests.rs +780 -0
  107. package/dist-engine-src/src/session/context.rs +364 -0
  108. package/dist-engine-src/src/session/create_version.rs +88 -0
  109. package/dist-engine-src/src/session/execute.rs +478 -0
  110. package/dist-engine-src/src/session/merge/analysis.rs +102 -0
  111. package/dist-engine-src/src/session/merge/apply.rs +23 -0
  112. package/dist-engine-src/src/session/merge/conflicts.rs +63 -0
  113. package/dist-engine-src/src/session/merge/mod.rs +11 -0
  114. package/dist-engine-src/src/session/merge/stats.rs +65 -0
  115. package/dist-engine-src/src/session/merge/version.rs +427 -0
  116. package/dist-engine-src/src/session/mod.rs +27 -0
  117. package/dist-engine-src/src/session/optimization9_sql2_bench.rs +100 -0
  118. package/dist-engine-src/src/session/switch_version.rs +109 -0
  119. package/dist-engine-src/src/sql2/change_provider.rs +331 -0
  120. package/dist-engine-src/src/sql2/classify.rs +182 -0
  121. package/dist-engine-src/src/sql2/context.rs +311 -0
  122. package/dist-engine-src/src/sql2/directory_history_provider.rs +631 -0
  123. package/dist-engine-src/src/sql2/directory_provider.rs +2453 -0
  124. package/dist-engine-src/src/sql2/dml.rs +148 -0
  125. package/dist-engine-src/src/sql2/entity_history_provider.rs +440 -0
  126. package/dist-engine-src/src/sql2/entity_provider.rs +3211 -0
  127. package/dist-engine-src/src/sql2/error.rs +216 -0
  128. package/dist-engine-src/src/sql2/execute.rs +3440 -0
  129. package/dist-engine-src/src/sql2/file_history_provider.rs +910 -0
  130. package/dist-engine-src/src/sql2/file_provider.rs +3679 -0
  131. package/dist-engine-src/src/sql2/filesystem_planner.rs +1490 -0
  132. package/dist-engine-src/src/sql2/filesystem_predicates.rs +159 -0
  133. package/dist-engine-src/src/sql2/filesystem_visibility.rs +383 -0
  134. package/dist-engine-src/src/sql2/history_projection.rs +56 -0
  135. package/dist-engine-src/src/sql2/history_provider.rs +412 -0
  136. package/dist-engine-src/src/sql2/history_route.rs +657 -0
  137. package/dist-engine-src/src/sql2/lix_state_provider.rs +2512 -0
  138. package/dist-engine-src/src/sql2/mod.rs +46 -0
  139. package/dist-engine-src/src/sql2/predicate_typecheck.rs +246 -0
  140. package/dist-engine-src/src/sql2/public_bind/assignment.rs +46 -0
  141. package/dist-engine-src/src/sql2/public_bind/capability.rs +41 -0
  142. package/dist-engine-src/src/sql2/public_bind/dml.rs +166 -0
  143. package/dist-engine-src/src/sql2/public_bind/mod.rs +25 -0
  144. package/dist-engine-src/src/sql2/public_bind/table.rs +168 -0
  145. package/dist-engine-src/src/sql2/read_only.rs +63 -0
  146. package/dist-engine-src/src/sql2/record_batch.rs +17 -0
  147. package/dist-engine-src/src/sql2/result_metadata.rs +29 -0
  148. package/dist-engine-src/src/sql2/runtime.rs +60 -0
  149. package/dist-engine-src/src/sql2/session.rs +132 -0
  150. package/dist-engine-src/src/sql2/udfs/common.rs +295 -0
  151. package/dist-engine-src/src/sql2/udfs/lix_active_version_commit_id.rs +53 -0
  152. package/dist-engine-src/src/sql2/udfs/lix_empty_blob.rs +47 -0
  153. package/dist-engine-src/src/sql2/udfs/lix_json.rs +100 -0
  154. package/dist-engine-src/src/sql2/udfs/lix_json_get.rs +99 -0
  155. package/dist-engine-src/src/sql2/udfs/lix_json_get_text.rs +99 -0
  156. package/dist-engine-src/src/sql2/udfs/lix_text_decode.rs +82 -0
  157. package/dist-engine-src/src/sql2/udfs/lix_text_encode.rs +85 -0
  158. package/dist-engine-src/src/sql2/udfs/lix_timestamp.rs +76 -0
  159. package/dist-engine-src/src/sql2/udfs/lix_uuid_v7.rs +76 -0
  160. package/dist-engine-src/src/sql2/udfs/mod.rs +89 -0
  161. package/dist-engine-src/src/sql2/udfs/public_call.rs +211 -0
  162. package/dist-engine-src/src/sql2/version_provider.rs +1202 -0
  163. package/dist-engine-src/src/sql2/version_scope.rs +394 -0
  164. package/dist-engine-src/src/sql2/write_normalization.rs +345 -0
  165. package/dist-engine-src/src/storage/context.rs +356 -0
  166. package/dist-engine-src/src/storage/mod.rs +14 -0
  167. package/dist-engine-src/src/storage/read_scope.rs +88 -0
  168. package/dist-engine-src/src/storage/types.rs +501 -0
  169. package/dist-engine-src/src/storage_bench.rs +4863 -0
  170. package/dist-engine-src/src/test_support.rs +228 -0
  171. package/dist-engine-src/src/tracked_state/by_file_index.rs +98 -0
  172. package/dist-engine-src/src/tracked_state/codec.rs +2085 -0
  173. package/dist-engine-src/src/tracked_state/context.rs +1867 -0
  174. package/dist-engine-src/src/tracked_state/diff.rs +686 -0
  175. package/dist-engine-src/src/tracked_state/materialization.rs +403 -0
  176. package/dist-engine-src/src/tracked_state/materializer.rs +488 -0
  177. package/dist-engine-src/src/tracked_state/merge.rs +492 -0
  178. package/dist-engine-src/src/tracked_state/mod.rs +32 -0
  179. package/dist-engine-src/src/tracked_state/storage.rs +375 -0
  180. package/dist-engine-src/src/tracked_state/tree.rs +3187 -0
  181. package/dist-engine-src/src/tracked_state/types.rs +231 -0
  182. package/dist-engine-src/src/transaction/commit.rs +1484 -0
  183. package/dist-engine-src/src/transaction/context.rs +1548 -0
  184. package/dist-engine-src/src/transaction/live_state_overlay.rs +35 -0
  185. package/dist-engine-src/src/transaction/mod.rs +13 -0
  186. package/dist-engine-src/src/transaction/normalization.rs +890 -0
  187. package/dist-engine-src/src/transaction/prep.rs +37 -0
  188. package/dist-engine-src/src/transaction/schema_resolver.rs +149 -0
  189. package/dist-engine-src/src/transaction/staging.rs +1731 -0
  190. package/dist-engine-src/src/transaction/types.rs +460 -0
  191. package/dist-engine-src/src/transaction/validation.rs +5830 -0
  192. package/dist-engine-src/src/untracked_state/codec.rs +307 -0
  193. package/dist-engine-src/src/untracked_state/context.rs +98 -0
  194. package/dist-engine-src/src/untracked_state/materialization.rs +63 -0
  195. package/dist-engine-src/src/untracked_state/mod.rs +15 -0
  196. package/dist-engine-src/src/untracked_state/storage.rs +396 -0
  197. package/dist-engine-src/src/untracked_state/types.rs +146 -0
  198. package/dist-engine-src/src/version/context.rs +40 -0
  199. package/dist-engine-src/src/version/lifecycle.rs +221 -0
  200. package/dist-engine-src/src/version/mod.rs +13 -0
  201. package/dist-engine-src/src/version/refs.rs +330 -0
  202. package/dist-engine-src/src/version/stage_rows.rs +67 -0
  203. package/dist-engine-src/src/version/types.rs +21 -0
  204. package/dist-engine-src/src/wasm/mod.rs +60 -0
  205. package/package.json +68 -64
@@ -0,0 +1,1336 @@
1
+ //! Canonical Lix filesystem paths live in this module.
2
+ //!
3
+ //! Contract:
4
+ //!
5
+ //! - Canonical internal form is an absolute slash-separated Lix filesystem
6
+ //! path, structurally aligned with RFC 3986 `path-absolute` / RFC 8089 file
7
+ //! URI paths.
8
+ //! - RFC 3986/8089 URI spelling is a boundary serialization, not the internal
9
+ //! identity form.
10
+ //! - Each non-empty segment is enforced with an RFC 8264 PRECIS
11
+ //! `IdentifierClass` profile, case-preserved and NFC-normalized.
12
+ //! - Percent encoding is accepted only as boundary input. Canonical internal
13
+ //! paths store decoded Unicode segments, never percent triplets.
14
+ //! - Dot segments are rejected rather than rewritten because Lix paths are
15
+ //! stable logical identities, not URI references being resolved against a
16
+ //! base path.
17
+ //!
18
+ //! Canonicalization order:
19
+ //!
20
+ //! 1. Validate and decode RFC 3986 percent triplets in each segment.
21
+ //! 2. Normalize decoded segment text to NFC.
22
+ //! 3. Apply PRECIS IdentifierClass enforcement.
23
+ //! 4. Reject Lix structural sentinels and separators.
24
+ //!
25
+ //! Fixed standard-derived rules:
26
+ //!
27
+ //! - Path shape follows the absolute-path grammar used by RFC 3986/RFC 8089.
28
+ //! - Segment text follows RFC 8264 PRECIS IdentifierClass semantics.
29
+ //! - Comparison is exact-string and case-sensitive after canonicalization.
30
+ //!
31
+ //! Lix profile rules:
32
+ //!
33
+ //! - File paths never end with `/`.
34
+ //! - Directory paths always end with `/`.
35
+ //! - `NUL` is rejected in all segments.
36
+ //! - `/`, `\`, empty segments, `.`, and `..` are rejected in all non-root
37
+ //! segments.
38
+ //! - `%`, `?`, and `#` are reserved for URI boundary syntax and are rejected
39
+ //! in canonical internal segments.
40
+ //! - Segments cannot begin with a combining mark.
41
+ //! - Root is represented as the normalized directory path `/`.
42
+ //! - Git/CLI import and ASCII-only URI serialization are boundary adapters,
43
+ //! not part of the core `fs_path` contract.
44
+ //!
45
+ //! Length policy:
46
+ //!
47
+ //! - Each canonical segment is capped at 255 bytes, matching common
48
+ //! filesystem component limits.
49
+ //! - Each full canonical path is capped at 4096 bytes.
50
+ //! - Raw boundary input is separately capped before normalization so oversized
51
+ //! URI spellings cannot reach Unicode processing.
52
+ //!
53
+ //! Runtime strategy:
54
+ //!
55
+ //! - This module keeps Lix structural checks local and delegates Unicode
56
+ //! segment validity to the PRECIS implementation.
57
+ //! - `iref` is an RFC 3987 / RFC 3986 shape oracle in tests, not the runtime
58
+ //! segment authority.
59
+ //!
60
+ //! Glossary:
61
+ //!
62
+ //! - Raw input path: caller-provided path before normalization.
63
+ //! - Normalized path: path after NFC normalization.
64
+ //! - Canonical path: stored path after full normalization/canonicalization.
65
+ //! - File path: canonical path naming a file, without a trailing slash.
66
+ //! - Directory path: canonical path naming a directory, with a trailing slash.
67
+ //! - Internal path form: the canonical Unicode-bearing representation used by
68
+ //! the engine.
69
+ //! - Boundary URI form: an ASCII-only serialization used when interoperating
70
+ //! with URI-only systems.
71
+
72
+ use precis_profiles::precis_core::profile::Profile;
73
+ use precis_profiles::UsernameCasePreserved;
74
+ use unicode_normalization::{char::is_combining_mark, UnicodeNormalization};
75
+
76
+ use crate::LixError;
77
+ use std::fmt;
78
+ use std::ops::Deref;
79
+
80
+ const MAX_CANONICAL_PATH_BYTES: usize = 4096;
81
+ const MAX_CANONICAL_PATH_SEGMENT_BYTES: usize = 255;
82
+ const MAX_RAW_PATH_INPUT_BYTES: usize = 16 * 1024;
83
+
84
+ #[derive(Debug, Clone, PartialEq, Eq)]
85
+ pub(crate) struct NormalizedDirectoryPath(String);
86
+
87
+ impl NormalizedDirectoryPath {
88
+ #[cfg(test)]
89
+ pub(crate) fn try_from_path(path: &str) -> Result<Self, LixError> {
90
+ normalize_directory_path(path).map(Self)
91
+ }
92
+ pub(crate) fn from_normalized(path: String) -> Self {
93
+ Self(path)
94
+ }
95
+
96
+ pub(crate) fn as_str(&self) -> &str {
97
+ self.0.as_str()
98
+ }
99
+ }
100
+
101
+ impl Deref for NormalizedDirectoryPath {
102
+ type Target = str;
103
+
104
+ fn deref(&self) -> &Self::Target {
105
+ self.as_str()
106
+ }
107
+ }
108
+
109
+ impl fmt::Display for NormalizedDirectoryPath {
110
+ fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
111
+ f.write_str(self.as_str())
112
+ }
113
+ }
114
+
115
+ #[derive(Debug, Clone, PartialEq, Eq)]
116
+ pub(crate) struct NormalizedFilePath(String);
117
+
118
+ impl NormalizedFilePath {
119
+ pub(crate) fn from_normalized(path: String) -> Self {
120
+ Self(path)
121
+ }
122
+
123
+ pub(crate) fn as_str(&self) -> &str {
124
+ self.0.as_str()
125
+ }
126
+ }
127
+
128
+ impl Deref for NormalizedFilePath {
129
+ type Target = str;
130
+
131
+ fn deref(&self) -> &Self::Target {
132
+ self.as_str()
133
+ }
134
+ }
135
+
136
+ impl fmt::Display for NormalizedFilePath {
137
+ fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
138
+ f.write_str(self.as_str())
139
+ }
140
+ }
141
+
142
+ #[derive(Debug, Clone, PartialEq, Eq)]
143
+ pub(crate) struct ParsedFilePath {
144
+ pub(crate) normalized_path: NormalizedFilePath,
145
+ pub(crate) directory_path: Option<NormalizedDirectoryPath>,
146
+ pub(crate) name: String,
147
+ }
148
+
149
+ impl ParsedFilePath {
150
+ pub(crate) fn try_from_path(path: &str) -> Result<Self, LixError> {
151
+ parse_file_path(path)
152
+ }
153
+ }
154
+
155
+ type PathResult<T> = Result<T, PathError>;
156
+
157
+ #[derive(Debug, Clone, Copy, PartialEq, Eq)]
158
+ enum PathError {
159
+ MissingLeadingSlash,
160
+ UnexpectedTrailingSlashOnFilePath,
161
+ MissingTrailingSlashOnDirectoryPath,
162
+ EmptySegment,
163
+ DotSegment,
164
+ SlashInSegment,
165
+ Backslash,
166
+ InvalidPercentEncoding,
167
+ InvalidPathSegmentCodePoint,
168
+ PathTooLong,
169
+ RawPathInputTooLong,
170
+ SegmentTooLong,
171
+ NulByte,
172
+ InvalidRootUsage,
173
+ #[cfg(test)]
174
+ InvalidDirectoryParentPath,
175
+ }
176
+
177
+ impl PathError {
178
+ fn into_lix_error(self) -> LixError {
179
+ let (code, message, hint) = match self {
180
+ Self::MissingLeadingSlash => (
181
+ "LIX_ERROR_PATH_MISSING_LEADING_SLASH",
182
+ "path must start with '/'",
183
+ Some("prefix the path with '/'"),
184
+ ),
185
+ Self::UnexpectedTrailingSlashOnFilePath => (
186
+ "LIX_ERROR_PATH_UNEXPECTED_TRAILING_SLASH_ON_FILE",
187
+ "file path must not end with '/'",
188
+ Some("remove the trailing slash or use a directory path instead"),
189
+ ),
190
+ Self::MissingTrailingSlashOnDirectoryPath => (
191
+ "LIX_ERROR_PATH_MISSING_TRAILING_SLASH_ON_DIRECTORY",
192
+ "directory path must end with '/'",
193
+ Some("append a trailing slash or use a file path instead"),
194
+ ),
195
+ Self::EmptySegment => (
196
+ "LIX_ERROR_PATH_EMPTY_SEGMENT",
197
+ "path must not contain empty segments",
198
+ Some("remove duplicate slashes like '//'"),
199
+ ),
200
+ Self::DotSegment => (
201
+ "LIX_ERROR_PATH_DOT_SEGMENT",
202
+ "path segment cannot be '.' or '..'",
203
+ Some("use a real segment name instead of '.' or '..'"),
204
+ ),
205
+ Self::SlashInSegment => (
206
+ "LIX_ERROR_PATH_SLASH_IN_SEGMENT",
207
+ "path segment must not contain '/'",
208
+ Some("pass a single segment name, not a full path"),
209
+ ),
210
+ Self::Backslash => (
211
+ "LIX_ERROR_PATH_BACKSLASH",
212
+ "path must not contain '\\'",
213
+ Some("use '/' separators instead of '\\'"),
214
+ ),
215
+ Self::InvalidPercentEncoding => (
216
+ "LIX_ERROR_PATH_INVALID_PERCENT_ENCODING",
217
+ "path contains invalid percent encoding",
218
+ Some("use valid percent triplets only for URI boundary input; '%' is not allowed in canonical path segments"),
219
+ ),
220
+ Self::InvalidPathSegmentCodePoint => (
221
+ "LIX_ERROR_PATH_INVALID_SEGMENT_CODE_POINT",
222
+ "path segment contains a character that is not allowed in canonical Lix paths",
223
+ Some("canonical paths use RFC 8264 PRECIS IdentifierClass segments; use URI percent encoding only at boundaries"),
224
+ ),
225
+ Self::PathTooLong => (
226
+ "LIX_ERROR_PATH_TOO_LONG",
227
+ "path is too long",
228
+ Some("keep canonical paths at or below 4096 bytes"),
229
+ ),
230
+ Self::RawPathInputTooLong => (
231
+ "LIX_ERROR_PATH_INPUT_TOO_LONG",
232
+ "path input is too long",
233
+ Some("keep raw path input at or below 16384 bytes"),
234
+ ),
235
+ Self::SegmentTooLong => (
236
+ "LIX_ERROR_PATH_SEGMENT_TOO_LONG",
237
+ "path segment is too long",
238
+ Some("keep each canonical path segment at or below 255 bytes"),
239
+ ),
240
+ Self::NulByte => (
241
+ "LIX_ERROR_PATH_NUL_BYTE",
242
+ "path must not contain a NUL byte",
243
+ Some("remove the NUL byte from the path"),
244
+ ),
245
+ Self::InvalidRootUsage => (
246
+ "LIX_ERROR_PATH_INVALID_ROOT_USAGE",
247
+ "root '/' is only valid as a directory path",
248
+ Some("use '/' as a directory path, never as a file path"),
249
+ ),
250
+ #[cfg(test)]
251
+ Self::InvalidDirectoryParentPath => (
252
+ "LIX_ERROR_PATH_INVALID_DIRECTORY_PARENT",
253
+ "directory parent path must be a normalized directory path",
254
+ Some("pass '/' or a path ending with '/' as the parent directory"),
255
+ ),
256
+ };
257
+
258
+ let err = LixError::new(code, message);
259
+ match hint {
260
+ Some(hint) => err.with_hint(hint),
261
+ None => err,
262
+ }
263
+ }
264
+ }
265
+
266
+ pub(crate) fn normalize_path_segment(raw: &str) -> Result<String, LixError> {
267
+ normalize_path_segment_impl(raw).map_err(PathError::into_lix_error)
268
+ }
269
+
270
+ fn normalize_path_segment_impl(raw: &str) -> PathResult<String> {
271
+ ensure_raw_path_input_len(raw)?;
272
+ let normalized = raw.nfc().collect::<String>();
273
+ let canonical = normalize_validated_path_segment(&normalized)?;
274
+ if canonical == "." || canonical == ".." {
275
+ return Err(PathError::DotSegment);
276
+ }
277
+ Ok(canonical)
278
+ }
279
+
280
+ fn validate_path_segment_chars(normalized: &str) -> PathResult<String> {
281
+ if normalized.is_empty() {
282
+ return Err(PathError::EmptySegment);
283
+ }
284
+ if normalized.contains('\0') {
285
+ return Err(PathError::NulByte);
286
+ }
287
+ if normalized.contains('/') {
288
+ return Err(PathError::SlashInSegment);
289
+ }
290
+ if normalized.contains('\\') {
291
+ return Err(PathError::Backslash);
292
+ }
293
+ if !segment_has_valid_percent_encoding(&normalized) {
294
+ return Err(PathError::InvalidPercentEncoding);
295
+ }
296
+ let decoded = decode_percent_encoded_segment(normalized)?;
297
+ validate_decoded_path_segment_structure(&decoded)?;
298
+ Ok(decoded)
299
+ }
300
+
301
+ fn normalize_validated_path_segment(normalized: &str) -> PathResult<String> {
302
+ let decoded = validate_path_segment_chars(normalized)?;
303
+ ensure_canonical_segment_len(&decoded)?;
304
+ let canonical = enforce_precis_segment(&decoded)?;
305
+ ensure_canonical_segment_len(&canonical)?;
306
+ Ok(canonical)
307
+ }
308
+
309
+ fn decode_percent_encoded_segment(segment: &str) -> PathResult<String> {
310
+ let bytes = segment.as_bytes();
311
+ let mut decoded = Vec::with_capacity(segment.len());
312
+ let mut index = 0usize;
313
+
314
+ while index < bytes.len() {
315
+ if bytes[index] == b'%' {
316
+ decoded.push((hex_value(bytes[index + 1]) << 4) | hex_value(bytes[index + 2]));
317
+ index += 3;
318
+ continue;
319
+ }
320
+
321
+ let ch = segment[index..]
322
+ .chars()
323
+ .next()
324
+ .expect("slice at char boundary should yield a char");
325
+ let mut utf8 = [0u8; 4];
326
+ decoded.extend_from_slice(ch.encode_utf8(&mut utf8).as_bytes());
327
+ index += ch.len_utf8();
328
+ }
329
+
330
+ String::from_utf8(decoded).map_err(|_| PathError::InvalidPathSegmentCodePoint)
331
+ }
332
+
333
+ fn hex_value(byte: u8) -> u8 {
334
+ match byte {
335
+ b'0'..=b'9' => byte - b'0',
336
+ b'a'..=b'f' => 10 + (byte - b'a'),
337
+ b'A'..=b'F' => 10 + (byte - b'A'),
338
+ _ => unreachable!("hex_value only called after percent validation"),
339
+ }
340
+ }
341
+
342
+ fn segment_has_valid_percent_encoding(segment: &str) -> bool {
343
+ let bytes = segment.as_bytes();
344
+ let mut index = 0usize;
345
+ while index < bytes.len() {
346
+ if bytes[index] == b'%' {
347
+ if index + 2 >= bytes.len() {
348
+ return false;
349
+ }
350
+ let hi = bytes[index + 1];
351
+ let lo = bytes[index + 2];
352
+ if !hi.is_ascii_hexdigit() || !lo.is_ascii_hexdigit() {
353
+ return false;
354
+ }
355
+ index += 3;
356
+ continue;
357
+ }
358
+ index += 1;
359
+ }
360
+ true
361
+ }
362
+
363
+ fn validate_decoded_path_segment_structure(segment: &str) -> PathResult<()> {
364
+ if segment.contains('\0') {
365
+ return Err(PathError::NulByte);
366
+ }
367
+ if segment.contains('/') {
368
+ return Err(PathError::SlashInSegment);
369
+ }
370
+ if segment.contains('\\') {
371
+ return Err(PathError::Backslash);
372
+ }
373
+ if segment.contains('%') || segment.contains('?') || segment.contains('#') {
374
+ return Err(PathError::InvalidPathSegmentCodePoint);
375
+ }
376
+ if segment.chars().next().is_some_and(is_combining_mark) {
377
+ return Err(PathError::InvalidPathSegmentCodePoint);
378
+ }
379
+ Ok(())
380
+ }
381
+
382
+ fn enforce_precis_segment(segment: &str) -> PathResult<String> {
383
+ UsernameCasePreserved::new()
384
+ .enforce(segment)
385
+ .map(|segment| segment.into_owned())
386
+ .map_err(|_| PathError::InvalidPathSegmentCodePoint)
387
+ }
388
+
389
+ fn normalize_file_path_impl(path: &str) -> PathResult<String> {
390
+ ensure_raw_path_input_len(path)?;
391
+ let normalized = path.nfc().collect::<String>();
392
+ if !normalized.starts_with('/') {
393
+ return Err(PathError::MissingLeadingSlash);
394
+ }
395
+ if normalized == "/" {
396
+ return Err(PathError::InvalidRootUsage);
397
+ }
398
+ if normalized.ends_with('/') {
399
+ return Err(PathError::UnexpectedTrailingSlashOnFilePath);
400
+ }
401
+ if normalized.contains('\\') {
402
+ return Err(PathError::Backslash);
403
+ }
404
+ if normalized.contains("//") {
405
+ return Err(PathError::EmptySegment);
406
+ }
407
+ let segments = normalized
408
+ .split('/')
409
+ .filter(|segment| !segment.is_empty())
410
+ .collect::<Vec<_>>();
411
+ if segments.is_empty() {
412
+ return Err(PathError::EmptySegment);
413
+ }
414
+ let canonical_segments = canonicalize_path_segments(&segments)?;
415
+ if canonical_segments.is_empty() {
416
+ return Err(PathError::InvalidRootUsage);
417
+ }
418
+ let canonical = format!("/{}", canonical_segments.join("/"));
419
+ ensure_canonical_path_len(&canonical)?;
420
+ Ok(canonical)
421
+ }
422
+
423
+ pub(crate) fn normalize_directory_path(path: &str) -> Result<String, LixError> {
424
+ normalize_directory_path_impl(path).map_err(PathError::into_lix_error)
425
+ }
426
+
427
+ fn normalize_directory_path_impl(path: &str) -> PathResult<String> {
428
+ ensure_raw_path_input_len(path)?;
429
+ let normalized = path.nfc().collect::<String>();
430
+ if !normalized.starts_with('/') {
431
+ return Err(PathError::MissingLeadingSlash);
432
+ }
433
+ if normalized.contains('\\') {
434
+ return Err(PathError::Backslash);
435
+ }
436
+ if normalized.contains("//") {
437
+ return Err(PathError::EmptySegment);
438
+ }
439
+ if normalized == "/" {
440
+ return Ok("/".to_string());
441
+ }
442
+ if !normalized.ends_with('/') {
443
+ return Err(PathError::MissingTrailingSlashOnDirectoryPath);
444
+ }
445
+ let segments = normalized
446
+ .split('/')
447
+ .filter(|segment| !segment.is_empty())
448
+ .collect::<Vec<_>>();
449
+ let normalized_segments = canonicalize_path_segments(&segments)?;
450
+ if normalized_segments.is_empty() {
451
+ return Ok("/".to_string());
452
+ }
453
+ let canonical = format!("/{}/", normalized_segments.join("/"));
454
+ ensure_canonical_path_len(&canonical)?;
455
+ Ok(canonical)
456
+ }
457
+
458
+ fn canonicalize_path_segments(segments: &[&str]) -> PathResult<Vec<String>> {
459
+ let mut canonical_segments = Vec::with_capacity(segments.len());
460
+
461
+ for segment in segments {
462
+ let normalized_segment = normalize_validated_path_segment(segment)?;
463
+ match normalized_segment.as_str() {
464
+ "." | ".." => return Err(PathError::DotSegment),
465
+ _ => canonical_segments.push(normalized_segment),
466
+ }
467
+ }
468
+
469
+ Ok(canonical_segments)
470
+ }
471
+
472
+ fn ensure_canonical_path_len(path: &str) -> PathResult<()> {
473
+ if path.len() > MAX_CANONICAL_PATH_BYTES {
474
+ Err(PathError::PathTooLong)
475
+ } else {
476
+ Ok(())
477
+ }
478
+ }
479
+
480
+ fn ensure_raw_path_input_len(path: &str) -> PathResult<()> {
481
+ if path.len() > MAX_RAW_PATH_INPUT_BYTES {
482
+ Err(PathError::RawPathInputTooLong)
483
+ } else {
484
+ Ok(())
485
+ }
486
+ }
487
+
488
+ fn ensure_canonical_segment_len(segment: &str) -> PathResult<()> {
489
+ if segment.len() > MAX_CANONICAL_PATH_SEGMENT_BYTES {
490
+ Err(PathError::SegmentTooLong)
491
+ } else {
492
+ Ok(())
493
+ }
494
+ }
495
+
496
+ pub(crate) fn parse_file_path(path: &str) -> Result<ParsedFilePath, LixError> {
497
+ parse_file_path_impl(path).map_err(PathError::into_lix_error)
498
+ }
499
+
500
+ fn parse_file_path_impl(path: &str) -> PathResult<ParsedFilePath> {
501
+ let normalized_path = normalize_file_path_impl(path)?;
502
+ let segments = normalized_path
503
+ .split('/')
504
+ .filter(|segment| !segment.is_empty())
505
+ .collect::<Vec<_>>();
506
+ let file_name = segments
507
+ .last()
508
+ .ok_or(PathError::InvalidRootUsage)?
509
+ .to_string();
510
+ let directory_path = if segments.len() > 1 {
511
+ Some(NormalizedDirectoryPath::from_normalized(format!(
512
+ "/{}/",
513
+ segments[..segments.len() - 1].join("/")
514
+ )))
515
+ } else {
516
+ None
517
+ };
518
+
519
+ Ok(ParsedFilePath {
520
+ normalized_path: NormalizedFilePath::from_normalized(normalized_path),
521
+ directory_path,
522
+ name: file_name,
523
+ })
524
+ }
525
+
526
+ pub(crate) fn directory_ancestor_paths(path: &str) -> Vec<String> {
527
+ ancestor_directory_paths(path)
528
+ }
529
+
530
+ fn ancestor_directory_paths(path: &str) -> Vec<String> {
531
+ let segments = path
532
+ .trim_matches('/')
533
+ .split('/')
534
+ .filter(|segment| !segment.is_empty())
535
+ .collect::<Vec<_>>();
536
+ if segments.len() <= 1 {
537
+ return Vec::new();
538
+ }
539
+
540
+ let mut ancestors = Vec::with_capacity(segments.len() - 1);
541
+ let mut prefix_segments: Vec<&str> = Vec::with_capacity(segments.len() - 1);
542
+ for segment in segments.iter().take(segments.len() - 1) {
543
+ prefix_segments.push(segment);
544
+ ancestors.push(format!("/{}/", prefix_segments.join("/")));
545
+ }
546
+ ancestors
547
+ }
548
+
549
+ pub(crate) fn parent_directory_path(path: &str) -> Option<String> {
550
+ let segments = path
551
+ .trim_matches('/')
552
+ .split('/')
553
+ .filter(|segment| !segment.is_empty())
554
+ .collect::<Vec<_>>();
555
+ if segments.len() <= 1 {
556
+ return None;
557
+ }
558
+ Some(format!("/{}/", segments[..segments.len() - 1].join("/")))
559
+ }
560
+
561
+ pub(crate) fn directory_name_from_path(path: &str) -> Option<String> {
562
+ path.trim_matches('/')
563
+ .split('/')
564
+ .filter(|segment| !segment.is_empty())
565
+ .next_back()
566
+ .map(|segment| segment.to_string())
567
+ }
568
+
569
+ #[cfg(test)]
570
+ pub(crate) fn compose_directory_path(parent_path: &str, name: &str) -> Result<String, LixError> {
571
+ let normalized_name = normalize_path_segment_impl(name).map_err(PathError::into_lix_error)?;
572
+ if parent_path == "/" {
573
+ Ok(format!("/{normalized_name}/"))
574
+ } else if parent_path.starts_with('/') && parent_path.ends_with('/') {
575
+ Ok(format!("{parent_path}{normalized_name}/"))
576
+ } else {
577
+ Err(PathError::InvalidDirectoryParentPath.into_lix_error())
578
+ }
579
+ }
580
+
581
+ #[cfg(test)]
582
+ mod tests {
583
+ use super::*;
584
+ use iref::iri::Path as IriPath;
585
+
586
+ #[derive(Clone, Copy, Debug)]
587
+ enum NormalizationKind {
588
+ File,
589
+ Directory,
590
+ Segment,
591
+ }
592
+
593
+ #[derive(Clone, Copy, Debug)]
594
+ enum LixFixtureKind {
595
+ File,
596
+ Directory,
597
+ }
598
+
599
+ #[derive(Clone, Copy, Debug)]
600
+ struct RfcFixture {
601
+ label: &'static str,
602
+ input: &'static str,
603
+ }
604
+
605
+ #[derive(Clone, Copy, Debug)]
606
+ struct LixProfileFixture {
607
+ label: &'static str,
608
+ kind: LixFixtureKind,
609
+ input: &'static str,
610
+ oracle_accepts: bool,
611
+ expected: Result<&'static str, PathError>,
612
+ }
613
+
614
+ #[derive(Clone, Copy, Debug)]
615
+ struct NormalizationFixture {
616
+ label: &'static str,
617
+ kind: NormalizationKind,
618
+ input: &'static str,
619
+ expected: &'static str,
620
+ }
621
+
622
+ fn assert_path_error<T: fmt::Debug>(result: PathResult<T>, expected: PathError) {
623
+ assert_eq!(result.unwrap_err(), expected);
624
+ }
625
+
626
+ fn iri_oracle_accepts(path: &str) -> bool {
627
+ IriPath::new(path).is_ok()
628
+ }
629
+
630
+ fn normalize_with_kind(kind: NormalizationKind, input: &str) -> Result<String, LixError> {
631
+ match kind {
632
+ NormalizationKind::File => {
633
+ normalize_file_path_impl(input).map_err(PathError::into_lix_error)
634
+ }
635
+ NormalizationKind::Directory => normalize_directory_path(input),
636
+ NormalizationKind::Segment => normalize_path_segment(input),
637
+ }
638
+ }
639
+
640
+ fn normalize_file_path(path: &str) -> Result<String, LixError> {
641
+ normalize_file_path_impl(path).map_err(PathError::into_lix_error)
642
+ }
643
+
644
+ fn assert_lix_profile_fixture(fixture: LixProfileFixture) {
645
+ assert_eq!(
646
+ iri_oracle_accepts(fixture.input),
647
+ fixture.oracle_accepts,
648
+ "iref oracle mismatch for {} ({})",
649
+ fixture.label,
650
+ fixture.input
651
+ );
652
+
653
+ match fixture.kind {
654
+ LixFixtureKind::File => match fixture.expected {
655
+ Ok(expected) => assert_eq!(
656
+ normalize_file_path(fixture.input).as_deref(),
657
+ Ok(expected),
658
+ "unexpected file result for {} ({})",
659
+ fixture.label,
660
+ fixture.input
661
+ ),
662
+ Err(expected) => {
663
+ assert_path_error(normalize_file_path_impl(fixture.input), expected)
664
+ }
665
+ },
666
+ LixFixtureKind::Directory => match fixture.expected {
667
+ Ok(expected) => assert_eq!(
668
+ normalize_directory_path(fixture.input).as_deref(),
669
+ Ok(expected),
670
+ "unexpected directory result for {} ({})",
671
+ fixture.label,
672
+ fixture.input
673
+ ),
674
+ Err(expected) => {
675
+ assert_path_error(normalize_directory_path_impl(fixture.input), expected)
676
+ }
677
+ },
678
+ }
679
+ }
680
+
681
+ const RFC_POSITIVE_FIXTURES: &[RfcFixture] = &[
682
+ RfcFixture {
683
+ label: "absolute unicode file path",
684
+ input: "/unicodé/段落.md",
685
+ },
686
+ RfcFixture {
687
+ label: "path with pchar punctuation",
688
+ input: "/docs/hello:world@x!$&'()*+,;=.md",
689
+ },
690
+ ];
691
+
692
+ const RFC_NEGATIVE_FIXTURES: &[RfcFixture] = &[
693
+ RfcFixture {
694
+ label: "invalid percent triplet",
695
+ input: "/docs/%zz.md",
696
+ },
697
+ RfcFixture {
698
+ label: "truncated percent triplet",
699
+ input: "/docs/%2",
700
+ },
701
+ RfcFixture {
702
+ label: "raw space is not allowed in an ipath",
703
+ input: "/docs/file name.md",
704
+ },
705
+ RfcFixture {
706
+ label: "raw fragment delimiter is not part of the path grammar",
707
+ input: "/docs/#hash",
708
+ },
709
+ RfcFixture {
710
+ label: "private use code point is excluded from ucschar",
711
+ input: "/docs/\u{E000}.md",
712
+ },
713
+ ];
714
+
715
+ const LIX_PROFILE_POSITIVE_FIXTURES: &[LixProfileFixture] = &[
716
+ LixProfileFixture {
717
+ label: "root directory is representable",
718
+ kind: LixFixtureKind::Directory,
719
+ input: "/",
720
+ oracle_accepts: true,
721
+ expected: Ok("/"),
722
+ },
723
+ LixProfileFixture {
724
+ label: "directory paths require trailing slash",
725
+ kind: LixFixtureKind::Directory,
726
+ input: "/docs/",
727
+ oracle_accepts: true,
728
+ expected: Ok("/docs/"),
729
+ },
730
+ LixProfileFixture {
731
+ label: "file paths stay slashless at the end",
732
+ kind: LixFixtureKind::File,
733
+ input: "/docs/readme.md",
734
+ oracle_accepts: true,
735
+ expected: Ok("/docs/readme.md"),
736
+ },
737
+ ];
738
+
739
+ const LIX_PROFILE_NEGATIVE_FIXTURES: &[LixProfileFixture] = &[
740
+ LixProfileFixture {
741
+ label: "relative-looking path is valid RFC syntax but not a Lix path",
742
+ kind: LixFixtureKind::File,
743
+ input: "docs/readme.md",
744
+ oracle_accepts: true,
745
+ expected: Err(PathError::MissingLeadingSlash),
746
+ },
747
+ LixProfileFixture {
748
+ label: "file paths reject trailing slash even though RFC syntax allows it",
749
+ kind: LixFixtureKind::File,
750
+ input: "/docs/",
751
+ oracle_accepts: true,
752
+ expected: Err(PathError::UnexpectedTrailingSlashOnFilePath),
753
+ },
754
+ LixProfileFixture {
755
+ label: "directory paths reject missing trailing slash even though RFC syntax allows it",
756
+ kind: LixFixtureKind::Directory,
757
+ input: "/docs",
758
+ oracle_accepts: true,
759
+ expected: Err(PathError::MissingTrailingSlashOnDirectoryPath),
760
+ },
761
+ LixProfileFixture {
762
+ label: "empty segments are valid RFC paths but banned by the Lix profile",
763
+ kind: LixFixtureKind::File,
764
+ input: "/docs//guide.md",
765
+ oracle_accepts: true,
766
+ expected: Err(PathError::EmptySegment),
767
+ },
768
+ LixProfileFixture {
769
+ label: "root is not a valid file path",
770
+ kind: LixFixtureKind::File,
771
+ input: "/",
772
+ oracle_accepts: true,
773
+ expected: Err(PathError::InvalidRootUsage),
774
+ },
775
+ LixProfileFixture {
776
+ label: "percent-encoded spaces are valid URI syntax but not Lix segment identity",
777
+ kind: LixFixtureKind::File,
778
+ input: "/docs/%20notes.md",
779
+ oracle_accepts: true,
780
+ expected: Err(PathError::InvalidPathSegmentCodePoint),
781
+ },
782
+ LixProfileFixture {
783
+ label: "bidi formatting is rejected by the Lix validator even though iref accepts it",
784
+ kind: LixFixtureKind::File,
785
+ input: "/docs/\u{202E}.md",
786
+ oracle_accepts: true,
787
+ expected: Err(PathError::InvalidPathSegmentCodePoint),
788
+ },
789
+ LixProfileFixture {
790
+ label: "dot segments are valid RFC syntax but banned by the Lix profile",
791
+ kind: LixFixtureKind::File,
792
+ input: "/docs/../guide.md",
793
+ oracle_accepts: true,
794
+ expected: Err(PathError::DotSegment),
795
+ },
796
+ ];
797
+
798
+ const NORMALIZATION_FIXTURES: &[NormalizationFixture] = &[
799
+ NormalizationFixture {
800
+ label: "nfc composition happens before validation",
801
+ kind: NormalizationKind::File,
802
+ input: "/Cafe\u{0301}.md",
803
+ expected: "/Café.md",
804
+ },
805
+ NormalizationFixture {
806
+ label: "percent-encoded segment text is decoded before storage",
807
+ kind: NormalizationKind::Directory,
808
+ input: "/docs/%43afe%CC%81/",
809
+ expected: "/docs/Café/",
810
+ },
811
+ NormalizationFixture {
812
+ label: "unreserved percent encoding is decoded",
813
+ kind: NormalizationKind::File,
814
+ input: "/docs/%7e%41.md",
815
+ expected: "/docs/~A.md",
816
+ },
817
+ NormalizationFixture {
818
+ label: "root survives directory normalization",
819
+ kind: NormalizationKind::Directory,
820
+ input: "/",
821
+ expected: "/",
822
+ },
823
+ NormalizationFixture {
824
+ label: "segment normalization decodes unreserved percent triplets",
825
+ kind: NormalizationKind::Segment,
826
+ input: "%7ehello",
827
+ expected: "~hello",
828
+ },
829
+ ];
830
+
831
+ #[test]
832
+ fn rfc_positive_path_fixtures_agree_with_iref() {
833
+ for fixture in RFC_POSITIVE_FIXTURES {
834
+ assert!(
835
+ iri_oracle_accepts(fixture.input),
836
+ "iref should accept {} ({})",
837
+ fixture.label,
838
+ fixture.input
839
+ );
840
+ assert!(
841
+ normalize_file_path_impl(fixture.input).is_ok(),
842
+ "lix should accept {} ({})",
843
+ fixture.label,
844
+ fixture.input
845
+ );
846
+ }
847
+ }
848
+
849
+ #[test]
850
+ fn rfc_negative_path_fixtures_agree_with_iref() {
851
+ for fixture in RFC_NEGATIVE_FIXTURES {
852
+ assert!(
853
+ !iri_oracle_accepts(fixture.input),
854
+ "iref should reject {} ({})",
855
+ fixture.label,
856
+ fixture.input
857
+ );
858
+ assert!(
859
+ normalize_file_path_impl(fixture.input).is_err(),
860
+ "lix should reject {} ({})",
861
+ fixture.label,
862
+ fixture.input
863
+ );
864
+ }
865
+ }
866
+
867
+ #[test]
868
+ fn lix_profile_positive_fixtures_are_pinned() {
869
+ for fixture in LIX_PROFILE_POSITIVE_FIXTURES {
870
+ assert_lix_profile_fixture(*fixture);
871
+ }
872
+ }
873
+
874
+ #[test]
875
+ fn lix_profile_negative_fixtures_document_divergence_from_the_oracle() {
876
+ for fixture in LIX_PROFILE_NEGATIVE_FIXTURES {
877
+ assert_lix_profile_fixture(*fixture);
878
+ }
879
+ }
880
+
881
+ #[test]
882
+ fn normalization_fixture_table_covers_canonicalization_rules() {
883
+ for fixture in NORMALIZATION_FIXTURES {
884
+ assert_eq!(
885
+ normalize_with_kind(fixture.kind, fixture.input).as_deref(),
886
+ Ok(fixture.expected),
887
+ "unexpected normalized value for {} ({})",
888
+ fixture.label,
889
+ fixture.input
890
+ );
891
+ }
892
+ }
893
+
894
+ #[test]
895
+ fn accepts_normalized_file_paths_with_unicode_and_percent_encoding() {
896
+ for path in [
897
+ "/docs/readme.md",
898
+ "/a/b/c.txt",
899
+ "/dash--path",
900
+ "/unicodé/段落.md",
901
+ "/docs/hello:world@x!$&'()*+,;=.md",
902
+ ] {
903
+ assert!(
904
+ normalize_file_path(path).is_ok(),
905
+ "expected valid path {path}"
906
+ );
907
+ }
908
+ }
909
+
910
+ #[test]
911
+ fn rejects_structural_file_path_anomalies() {
912
+ assert_path_error(normalize_file_path_impl("/"), PathError::InvalidRootUsage);
913
+ assert_path_error(
914
+ normalize_file_path_impl("/trailing/"),
915
+ PathError::UnexpectedTrailingSlashOnFilePath,
916
+ );
917
+ assert_path_error(
918
+ normalize_file_path_impl("no-leading"),
919
+ PathError::MissingLeadingSlash,
920
+ );
921
+ assert_path_error(
922
+ normalize_file_path_impl("/bad//double"),
923
+ PathError::EmptySegment,
924
+ );
925
+ }
926
+
927
+ #[test]
928
+ fn rejects_file_paths_with_dot_segments() {
929
+ for path in [
930
+ "/docs/./file",
931
+ "/docs/../file",
932
+ "/docs/%2e/file",
933
+ "/docs/%2E%2E/file",
934
+ ] {
935
+ assert_path_error(normalize_file_path_impl(path), PathError::DotSegment);
936
+ }
937
+ }
938
+
939
+ #[test]
940
+ fn rejects_file_paths_with_invalid_characters() {
941
+ for path in ["/docs/file?.md", "/docs/#hash", "/docs/file name.md"] {
942
+ assert_path_error(
943
+ normalize_file_path_impl(path),
944
+ PathError::InvalidPathSegmentCodePoint,
945
+ );
946
+ }
947
+ }
948
+
949
+ #[test]
950
+ fn rejects_file_paths_and_segments_over_length_limits() {
951
+ let segment_at_limit = "a".repeat(MAX_CANONICAL_PATH_SEGMENT_BYTES);
952
+ let path_at_limit = format!("/{segment_at_limit}");
953
+ assert_eq!(
954
+ normalize_file_path(&path_at_limit).as_deref(),
955
+ Ok(path_at_limit.as_str())
956
+ );
957
+
958
+ let segment_over_limit = "a".repeat(MAX_CANONICAL_PATH_SEGMENT_BYTES + 1);
959
+ assert_path_error(
960
+ normalize_file_path_impl(&format!("/{segment_over_limit}")),
961
+ PathError::SegmentTooLong,
962
+ );
963
+ assert_path_error(
964
+ normalize_path_segment_impl(&segment_over_limit),
965
+ PathError::SegmentTooLong,
966
+ );
967
+
968
+ let mut segments = Vec::new();
969
+ let mut raw_len = 1usize;
970
+ while raw_len <= MAX_CANONICAL_PATH_BYTES {
971
+ segments.push("abcd");
972
+ raw_len = 1 + segments.join("/").len();
973
+ }
974
+ assert_path_error(
975
+ normalize_file_path_impl(&format!("/{}", segments.join("/"))),
976
+ PathError::PathTooLong,
977
+ );
978
+ }
979
+
980
+ #[test]
981
+ fn rejects_file_paths_with_private_use_and_noncharacter_code_points() {
982
+ for path in ["/docs/\u{E000}.md", "/docs/\u{FDD0}.md"] {
983
+ assert_path_error(
984
+ normalize_file_path_impl(path),
985
+ PathError::InvalidPathSegmentCodePoint,
986
+ );
987
+ }
988
+ }
989
+
990
+ #[test]
991
+ fn rejects_file_paths_with_bidi_formatting_characters() {
992
+ for path in ["/docs/\u{200E}.md", "/docs/\u{202E}.md"] {
993
+ assert_path_error(
994
+ normalize_file_path_impl(path),
995
+ PathError::InvalidPathSegmentCodePoint,
996
+ );
997
+ }
998
+ }
999
+
1000
+ #[test]
1001
+ fn rejects_default_ignorable_and_invisible_segment_characters() {
1002
+ for path in [
1003
+ "/docs/a\u{200B}b.md", // ZERO WIDTH SPACE
1004
+ "/docs/a\u{200C}b.md", // ZERO WIDTH NON-JOINER
1005
+ "/docs/a\u{200D}b.md", // ZERO WIDTH JOINER
1006
+ "/docs/a\u{2060}b.md", // WORD JOINER
1007
+ "/docs/a\u{00AD}b.md", // SOFT HYPHEN
1008
+ "/docs/a\u{034F}b.md", // COMBINING GRAPHEME JOINER
1009
+ "/docs/a\u{180E}b.md", // MONGOLIAN VOWEL SEPARATOR
1010
+ "/docs/a\u{FEFF}b.md", // ZERO WIDTH NO-BREAK SPACE
1011
+ ] {
1012
+ assert_path_error(
1013
+ normalize_file_path_impl(path),
1014
+ PathError::InvalidPathSegmentCodePoint,
1015
+ );
1016
+ }
1017
+ }
1018
+
1019
+ #[test]
1020
+ fn rejects_unicode_separators_and_leading_combining_marks() {
1021
+ for path in [
1022
+ "/docs/a\u{00A0}b.md", // NO-BREAK SPACE
1023
+ "/docs/a\u{2028}b.md", // LINE SEPARATOR
1024
+ "/docs/a\u{2029}b.md", // PARAGRAPH SEPARATOR
1025
+ "/docs/\u{0301}.md", // COMBINING ACUTE ACCENT
1026
+ ] {
1027
+ assert_path_error(
1028
+ normalize_file_path_impl(path),
1029
+ PathError::InvalidPathSegmentCodePoint,
1030
+ );
1031
+ }
1032
+ }
1033
+
1034
+ #[test]
1035
+ fn validates_percent_encoding_in_file_paths() {
1036
+ assert_eq!(
1037
+ normalize_file_path("/docs/%43afe%CC%81.md").as_deref(),
1038
+ Ok("/docs/Café.md")
1039
+ );
1040
+ assert_path_error(
1041
+ normalize_file_path_impl("/docs/%zz.md"),
1042
+ PathError::InvalidPercentEncoding,
1043
+ );
1044
+ assert_path_error(
1045
+ normalize_file_path_impl("/docs/abc%.md"),
1046
+ PathError::InvalidPercentEncoding,
1047
+ );
1048
+ assert_path_error(
1049
+ normalize_file_path_impl("/docs/abc%2.md"),
1050
+ PathError::InvalidPercentEncoding,
1051
+ );
1052
+ }
1053
+
1054
+ #[test]
1055
+ fn applies_segment_length_limit_to_canonical_text_not_percent_encoded_boundary_spelling() {
1056
+ let encoded_segment_at_limit = "%61".repeat(MAX_CANONICAL_PATH_SEGMENT_BYTES);
1057
+ let canonical_segment_at_limit = "a".repeat(MAX_CANONICAL_PATH_SEGMENT_BYTES);
1058
+ assert_eq!(
1059
+ normalize_file_path(&format!("/{encoded_segment_at_limit}")).as_deref(),
1060
+ Ok(format!("/{canonical_segment_at_limit}").as_str())
1061
+ );
1062
+ assert_eq!(
1063
+ normalize_directory_path(&format!("/{encoded_segment_at_limit}/")).as_deref(),
1064
+ Ok(format!("/{canonical_segment_at_limit}/").as_str())
1065
+ );
1066
+
1067
+ let encoded_segment_over_limit = "%61".repeat(MAX_CANONICAL_PATH_SEGMENT_BYTES + 1);
1068
+ assert_path_error(
1069
+ normalize_file_path_impl(&format!("/{encoded_segment_over_limit}")),
1070
+ PathError::SegmentTooLong,
1071
+ );
1072
+ assert_path_error(
1073
+ normalize_directory_path_impl(&format!("/{encoded_segment_over_limit}/")),
1074
+ PathError::SegmentTooLong,
1075
+ );
1076
+ }
1077
+
1078
+ #[test]
1079
+ fn rejects_raw_path_input_over_length_budget_before_unicode_processing() {
1080
+ let huge_file_path = format!("/{}", "a".repeat(1024 * 1024));
1081
+ assert_path_error(
1082
+ normalize_file_path_impl(&huge_file_path),
1083
+ PathError::RawPathInputTooLong,
1084
+ );
1085
+
1086
+ let huge_directory_path = format!("/{}/", "a".repeat(1024 * 1024));
1087
+ assert_path_error(
1088
+ normalize_directory_path_impl(&huge_directory_path),
1089
+ PathError::RawPathInputTooLong,
1090
+ );
1091
+ }
1092
+
1093
+ #[test]
1094
+ fn rejects_percent_encoded_forbidden_code_points_in_file_paths() {
1095
+ for (path, expected) in [
1096
+ ("/docs/%00evil.md", PathError::NulByte),
1097
+ ("/docs/%2Fevil.md", PathError::SlashInSegment),
1098
+ ("/docs/%5Cevil.md", PathError::Backslash),
1099
+ ("/docs/%25evil.md", PathError::InvalidPathSegmentCodePoint),
1100
+ ("/docs/%3Fevil.md", PathError::InvalidPathSegmentCodePoint),
1101
+ ("/docs/%23evil.md", PathError::InvalidPathSegmentCodePoint),
1102
+ (
1103
+ "/docs/%E2%80%AEevil.md",
1104
+ PathError::InvalidPathSegmentCodePoint,
1105
+ ),
1106
+ (
1107
+ "/docs/%E2%80%8Eevil.md",
1108
+ PathError::InvalidPathSegmentCodePoint,
1109
+ ),
1110
+ (
1111
+ "/docs/%E2%81%A0evil.md",
1112
+ PathError::InvalidPathSegmentCodePoint,
1113
+ ),
1114
+ (
1115
+ "/docs/%C2%ADevil.md",
1116
+ PathError::InvalidPathSegmentCodePoint,
1117
+ ),
1118
+ (
1119
+ "/docs/%CD%8Fevil.md",
1120
+ PathError::InvalidPathSegmentCodePoint,
1121
+ ),
1122
+ (
1123
+ "/docs/%E1%A0%8Eevil.md",
1124
+ PathError::InvalidPathSegmentCodePoint,
1125
+ ),
1126
+ (
1127
+ "/docs/%EF%BB%BFevil.md",
1128
+ PathError::InvalidPathSegmentCodePoint,
1129
+ ),
1130
+ (
1131
+ "/docs/%EF%B7%90evil.md",
1132
+ PathError::InvalidPathSegmentCodePoint,
1133
+ ),
1134
+ (
1135
+ "/docs/%EE%80%80evil.md",
1136
+ PathError::InvalidPathSegmentCodePoint,
1137
+ ),
1138
+ ("/docs/%FFevil.md", PathError::InvalidPathSegmentCodePoint),
1139
+ ] {
1140
+ assert_path_error(normalize_file_path_impl(path), expected);
1141
+ }
1142
+ }
1143
+
1144
+ #[test]
1145
+ fn rejects_percent_encoded_forbidden_code_points_in_directory_paths() {
1146
+ for (path, expected) in [
1147
+ ("/docs/%00evil/", PathError::NulByte),
1148
+ ("/docs/%2Fevil/", PathError::SlashInSegment),
1149
+ ("/docs/%5Cevil/", PathError::Backslash),
1150
+ (
1151
+ "/docs/%E2%80%AEevil/",
1152
+ PathError::InvalidPathSegmentCodePoint,
1153
+ ),
1154
+ (
1155
+ "/docs/%E2%80%8Eevil/",
1156
+ PathError::InvalidPathSegmentCodePoint,
1157
+ ),
1158
+ (
1159
+ "/docs/%E2%81%A0evil/",
1160
+ PathError::InvalidPathSegmentCodePoint,
1161
+ ),
1162
+ (
1163
+ "/docs/%EF%BB%BFevil/",
1164
+ PathError::InvalidPathSegmentCodePoint,
1165
+ ),
1166
+ (
1167
+ "/docs/%EF%B7%90evil/",
1168
+ PathError::InvalidPathSegmentCodePoint,
1169
+ ),
1170
+ (
1171
+ "/docs/%EE%80%80evil/",
1172
+ PathError::InvalidPathSegmentCodePoint,
1173
+ ),
1174
+ ("/docs/%FFevil/", PathError::InvalidPathSegmentCodePoint),
1175
+ ] {
1176
+ assert_path_error(normalize_directory_path_impl(path), expected);
1177
+ }
1178
+ }
1179
+
1180
+ #[test]
1181
+ fn canonicalizes_percent_encoding_in_file_paths() {
1182
+ assert_eq!(
1183
+ normalize_file_path("/docs/%7e%41%2e%2E.md").as_deref(),
1184
+ Ok("/docs/~A...md")
1185
+ );
1186
+ assert_path_error(
1187
+ normalize_file_path_impl("/docs/%2fkept%3aencoded"),
1188
+ PathError::SlashInSegment,
1189
+ );
1190
+ }
1191
+
1192
+ #[test]
1193
+ fn normalization_is_stable_on_renormalization() {
1194
+ let once = normalize_file_path("/docs/%7e/%41.md").expect("first normalization");
1195
+ let twice = normalize_file_path(&once).expect("second normalization");
1196
+ assert_eq!(once, twice);
1197
+ }
1198
+
1199
+ #[test]
1200
+ fn accepts_and_rejects_directory_paths_like_legacy_rules() {
1201
+ for path in ["/", "/docs/", "/docs/guides/", "/unicodé/章节/"] {
1202
+ assert!(
1203
+ normalize_directory_path(path).is_ok(),
1204
+ "expected valid directory path {path}"
1205
+ );
1206
+ }
1207
+ assert_path_error(
1208
+ normalize_directory_path_impl("/file.md"),
1209
+ PathError::MissingTrailingSlashOnDirectoryPath,
1210
+ );
1211
+ assert_path_error(
1212
+ normalize_directory_path_impl("/docs"),
1213
+ PathError::MissingTrailingSlashOnDirectoryPath,
1214
+ );
1215
+ assert_path_error(
1216
+ normalize_directory_path_impl("/docs/ "),
1217
+ PathError::MissingTrailingSlashOnDirectoryPath,
1218
+ );
1219
+ assert_path_error(
1220
+ normalize_directory_path_impl("/docs/ /"),
1221
+ PathError::InvalidPathSegmentCodePoint,
1222
+ );
1223
+ assert_path_error(
1224
+ normalize_directory_path_impl("no-leading"),
1225
+ PathError::MissingLeadingSlash,
1226
+ );
1227
+ assert_path_error(
1228
+ normalize_directory_path_impl("/docs/%zz/"),
1229
+ PathError::InvalidPercentEncoding,
1230
+ );
1231
+ }
1232
+
1233
+ #[test]
1234
+ fn canonicalizes_directory_paths() {
1235
+ assert_eq!(
1236
+ normalize_directory_path("/docs/%43afe%CC%81/").as_deref(),
1237
+ Ok("/docs/Café/")
1238
+ );
1239
+ }
1240
+
1241
+ #[test]
1242
+ fn rejects_directory_paths_and_segments_over_length_limits() {
1243
+ let segment_at_limit = "a".repeat(MAX_CANONICAL_PATH_SEGMENT_BYTES);
1244
+ let path_at_limit = format!("/{segment_at_limit}/");
1245
+ assert_eq!(
1246
+ normalize_directory_path(&path_at_limit).as_deref(),
1247
+ Ok(path_at_limit.as_str())
1248
+ );
1249
+
1250
+ let segment_over_limit = "a".repeat(MAX_CANONICAL_PATH_SEGMENT_BYTES + 1);
1251
+ assert_path_error(
1252
+ normalize_directory_path_impl(&format!("/{segment_over_limit}/")),
1253
+ PathError::SegmentTooLong,
1254
+ );
1255
+
1256
+ let mut segments = Vec::new();
1257
+ let mut raw_len = 1usize;
1258
+ while raw_len <= MAX_CANONICAL_PATH_BYTES {
1259
+ segments.push("abcd");
1260
+ raw_len = 2 + segments.join("/").len();
1261
+ }
1262
+ assert_path_error(
1263
+ normalize_directory_path_impl(&format!("/{}/", segments.join("/"))),
1264
+ PathError::PathTooLong,
1265
+ );
1266
+ }
1267
+
1268
+ #[test]
1269
+ fn rejects_directory_paths_with_dot_segments() {
1270
+ for path in ["/docs/./", "/docs/../", "/docs/%2e/", "/docs/%2E%2E/"] {
1271
+ assert_path_error(normalize_directory_path_impl(path), PathError::DotSegment);
1272
+ }
1273
+ }
1274
+
1275
+ #[test]
1276
+ fn represents_root_as_a_normalized_directory_path() {
1277
+ let root = NormalizedDirectoryPath::try_from_path("/").expect("root path");
1278
+ assert_eq!(root.as_str(), "/");
1279
+ assert_eq!(
1280
+ root,
1281
+ NormalizedDirectoryPath::from_normalized("/".to_string())
1282
+ );
1283
+ }
1284
+
1285
+ #[test]
1286
+ fn root_parent_and_top_level_parent_are_absent() {
1287
+ assert_eq!(parent_directory_path("/"), None);
1288
+ assert_eq!(parent_directory_path("/top-level.txt"), None);
1289
+ }
1290
+
1291
+ #[test]
1292
+ fn compose_directory_path_under_root() {
1293
+ assert_eq!(compose_directory_path("/", "docs").as_deref(), Ok("/docs/"));
1294
+ }
1295
+
1296
+ #[test]
1297
+ fn exposes_stable_lix_errors_with_hints() {
1298
+ let missing_leading = normalize_file_path("docs/readme.md").expect_err("leading slash");
1299
+ assert_eq!(missing_leading.code, "LIX_ERROR_PATH_MISSING_LEADING_SLASH");
1300
+ assert_eq!(missing_leading.hint(), Some("prefix the path with '/'"));
1301
+
1302
+ let bad_percent = normalize_file_path("/docs/%zz.md").expect_err("bad percent");
1303
+ assert_eq!(bad_percent.code, "LIX_ERROR_PATH_INVALID_PERCENT_ENCODING");
1304
+ assert_eq!(
1305
+ bad_percent.hint(),
1306
+ Some("use valid percent triplets only for URI boundary input; '%' is not allowed in canonical path segments")
1307
+ );
1308
+
1309
+ let root_file = normalize_file_path("/").expect_err("root as file");
1310
+ assert_eq!(root_file.code, "LIX_ERROR_PATH_INVALID_ROOT_USAGE");
1311
+ assert_eq!(
1312
+ root_file.hint(),
1313
+ Some("use '/' as a directory path, never as a file path")
1314
+ );
1315
+
1316
+ let long_segment = normalize_file_path(&format!(
1317
+ "/{}",
1318
+ "a".repeat(MAX_CANONICAL_PATH_SEGMENT_BYTES + 1)
1319
+ ))
1320
+ .expect_err("long segment");
1321
+ assert_eq!(long_segment.code, "LIX_ERROR_PATH_SEGMENT_TOO_LONG");
1322
+ assert_eq!(
1323
+ long_segment.hint(),
1324
+ Some("keep each canonical path segment at or below 255 bytes")
1325
+ );
1326
+
1327
+ let long_input =
1328
+ normalize_file_path(&format!("/{}", "a".repeat(MAX_RAW_PATH_INPUT_BYTES + 1)))
1329
+ .expect_err("long raw input");
1330
+ assert_eq!(long_input.code, "LIX_ERROR_PATH_INPUT_TOO_LONG");
1331
+ assert_eq!(
1332
+ long_input.hint(),
1333
+ Some("keep raw path input at or below 16384 bytes")
1334
+ );
1335
+ }
1336
+ }