diffctx 1.8.1__tar.gz → 1.9.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (160) hide show
  1. {diffctx-1.8.1 → diffctx-1.9.1}/CHANGELOG.md +29 -0
  2. {diffctx-1.8.1 → diffctx-1.9.1}/PKG-INFO +1 -1
  3. {diffctx-1.8.1 → diffctx-1.9.1}/diffctx/src/filtering.rs +2 -3
  4. {diffctx-1.8.1 → diffctx-1.9.1}/diffctx/src/git.rs +18 -2
  5. {diffctx-1.8.1 → diffctx-1.9.1}/diffctx/src/interval.rs +6 -4
  6. {diffctx-1.8.1 → diffctx-1.9.1}/diffctx/src/memory_pipeline.rs +16 -1
  7. {diffctx-1.8.1 → diffctx-1.9.1}/diffctx/src/pipeline.rs +63 -2
  8. {diffctx-1.8.1 → diffctx-1.9.1}/diffctx/src/postpass.rs +7 -2
  9. {diffctx-1.8.1 → diffctx-1.9.1}/diffctx/src/pybridge.rs +23 -0
  10. {diffctx-1.8.1 → diffctx-1.9.1}/diffctx/src/render.rs +106 -11
  11. {diffctx-1.8.1 → diffctx-1.9.1}/diffctx/src/signatures.rs +5 -1
  12. {diffctx-1.8.1 → diffctx-1.9.1}/diffctx/src/tokenizer.rs +1 -0
  13. {diffctx-1.8.1 → diffctx-1.9.1}/pyproject.toml +1 -1
  14. diffctx-1.9.1/src/diffctx/version.py +1 -0
  15. {diffctx-1.8.1 → diffctx-1.9.1}/src/diffctx/writer.py +23 -0
  16. diffctx-1.8.1/src/diffctx/version.py +0 -1
  17. {diffctx-1.8.1 → diffctx-1.9.1}/LICENSE +0 -0
  18. {diffctx-1.8.1 → diffctx-1.9.1}/README.md +0 -0
  19. {diffctx-1.8.1 → diffctx-1.9.1}/diffctx/Cargo.lock +0 -0
  20. {diffctx-1.8.1 → diffctx-1.9.1}/diffctx/Cargo.toml +0 -0
  21. {diffctx-1.8.1 → diffctx-1.9.1}/diffctx/src/analytics.rs +0 -0
  22. {diffctx-1.8.1 → diffctx-1.9.1}/diffctx/src/candidate_files.rs +0 -0
  23. {diffctx-1.8.1 → diffctx-1.9.1}/diffctx/src/config/analytics.rs +0 -0
  24. {diffctx-1.8.1 → diffctx-1.9.1}/diffctx/src/config/bm25.rs +0 -0
  25. {diffctx-1.8.1 → diffctx-1.9.1}/diffctx/src/config/budget.rs +0 -0
  26. {diffctx-1.8.1 → diffctx-1.9.1}/diffctx/src/config/category_weights.rs +0 -0
  27. {diffctx-1.8.1 → diffctx-1.9.1}/diffctx/src/config/edge_weights.rs +0 -0
  28. {diffctx-1.8.1 → diffctx-1.9.1}/diffctx/src/config/env_overrides.rs +0 -0
  29. {diffctx-1.8.1 → diffctx-1.9.1}/diffctx/src/config/extensions.rs +0 -0
  30. {diffctx-1.8.1 → diffctx-1.9.1}/diffctx/src/config/filtering.rs +0 -0
  31. {diffctx-1.8.1 → diffctx-1.9.1}/diffctx/src/config/fragmentation.rs +0 -0
  32. {diffctx-1.8.1 → diffctx-1.9.1}/diffctx/src/config/git.rs +0 -0
  33. {diffctx-1.8.1 → diffctx-1.9.1}/diffctx/src/config/graph_filtering.rs +0 -0
  34. {diffctx-1.8.1 → diffctx-1.9.1}/diffctx/src/config/importance.rs +0 -0
  35. {diffctx-1.8.1 → diffctx-1.9.1}/diffctx/src/config/limits.rs +0 -0
  36. {diffctx-1.8.1 → diffctx-1.9.1}/diffctx/src/config/mod.rs +0 -0
  37. {diffctx-1.8.1 → diffctx-1.9.1}/diffctx/src/config/mode.rs +0 -0
  38. {diffctx-1.8.1 → diffctx-1.9.1}/diffctx/src/config/needs.rs +0 -0
  39. {diffctx-1.8.1 → diffctx-1.9.1}/diffctx/src/config/parsers.rs +0 -0
  40. {diffctx-1.8.1 → diffctx-1.9.1}/diffctx/src/config/render.rs +0 -0
  41. {diffctx-1.8.1 → diffctx-1.9.1}/diffctx/src/config/scoring.rs +0 -0
  42. {diffctx-1.8.1 → diffctx-1.9.1}/diffctx/src/config/selection.rs +0 -0
  43. {diffctx-1.8.1 → diffctx-1.9.1}/diffctx/src/config/tokenization.rs +0 -0
  44. {diffctx-1.8.1 → diffctx-1.9.1}/diffctx/src/config/weights.rs +0 -0
  45. {diffctx-1.8.1 → diffctx-1.9.1}/diffctx/src/core.rs +0 -0
  46. {diffctx-1.8.1 → diffctx-1.9.1}/diffctx/src/discovery.rs +0 -0
  47. {diffctx-1.8.1 → diffctx-1.9.1}/diffctx/src/edges/base.rs +0 -0
  48. {diffctx-1.8.1 → diffctx-1.9.1}/diffctx/src/edges/config_edges/build_system.rs +0 -0
  49. {diffctx-1.8.1 → diffctx-1.9.1}/diffctx/src/edges/config_edges/cicd.rs +0 -0
  50. {diffctx-1.8.1 → diffctx-1.9.1}/diffctx/src/edges/config_edges/docker.rs +0 -0
  51. {diffctx-1.8.1 → diffctx-1.9.1}/diffctx/src/edges/config_edges/generic.rs +0 -0
  52. {diffctx-1.8.1 → diffctx-1.9.1}/diffctx/src/edges/config_edges/helm.rs +0 -0
  53. {diffctx-1.8.1 → diffctx-1.9.1}/diffctx/src/edges/config_edges/kubernetes.rs +0 -0
  54. {diffctx-1.8.1 → diffctx-1.9.1}/diffctx/src/edges/config_edges/mod.rs +0 -0
  55. {diffctx-1.8.1 → diffctx-1.9.1}/diffctx/src/edges/document/mod.rs +0 -0
  56. {diffctx-1.8.1 → diffctx-1.9.1}/diffctx/src/edges/history/cochange.rs +0 -0
  57. {diffctx-1.8.1 → diffctx-1.9.1}/diffctx/src/edges/history/mod.rs +0 -0
  58. {diffctx-1.8.1 → diffctx-1.9.1}/diffctx/src/edges/mod.rs +0 -0
  59. {diffctx-1.8.1 → diffctx-1.9.1}/diffctx/src/edges/semantic/ansible.rs +0 -0
  60. {diffctx-1.8.1 → diffctx-1.9.1}/diffctx/src/edges/semantic/bazel.rs +0 -0
  61. {diffctx-1.8.1 → diffctx-1.9.1}/diffctx/src/edges/semantic/c_family.rs +0 -0
  62. {diffctx-1.8.1 → diffctx-1.9.1}/diffctx/src/edges/semantic/cargo_edges.rs +0 -0
  63. {diffctx-1.8.1 → diffctx-1.9.1}/diffctx/src/edges/semantic/clojure.rs +0 -0
  64. {diffctx-1.8.1 → diffctx-1.9.1}/diffctx/src/edges/semantic/css.rs +0 -0
  65. {diffctx-1.8.1 → diffctx-1.9.1}/diffctx/src/edges/semantic/dart.rs +0 -0
  66. {diffctx-1.8.1 → diffctx-1.9.1}/diffctx/src/edges/semantic/dbt.rs +0 -0
  67. {diffctx-1.8.1 → diffctx-1.9.1}/diffctx/src/edges/semantic/dotnet.rs +0 -0
  68. {diffctx-1.8.1 → diffctx-1.9.1}/diffctx/src/edges/semantic/elixir.rs +0 -0
  69. {diffctx-1.8.1 → diffctx-1.9.1}/diffctx/src/edges/semantic/erlang.rs +0 -0
  70. {diffctx-1.8.1 → diffctx-1.9.1}/diffctx/src/edges/semantic/go.rs +0 -0
  71. {diffctx-1.8.1 → diffctx-1.9.1}/diffctx/src/edges/semantic/graphql.rs +0 -0
  72. {diffctx-1.8.1 → diffctx-1.9.1}/diffctx/src/edges/semantic/haskell.rs +0 -0
  73. {diffctx-1.8.1 → diffctx-1.9.1}/diffctx/src/edges/semantic/javascript.rs +0 -0
  74. {diffctx-1.8.1 → diffctx-1.9.1}/diffctx/src/edges/semantic/julia.rs +0 -0
  75. {diffctx-1.8.1 → diffctx-1.9.1}/diffctx/src/edges/semantic/jvm.rs +0 -0
  76. {diffctx-1.8.1 → diffctx-1.9.1}/diffctx/src/edges/semantic/latex.rs +0 -0
  77. {diffctx-1.8.1 → diffctx-1.9.1}/diffctx/src/edges/semantic/lua.rs +0 -0
  78. {diffctx-1.8.1 → diffctx-1.9.1}/diffctx/src/edges/semantic/mod.rs +0 -0
  79. {diffctx-1.8.1 → diffctx-1.9.1}/diffctx/src/edges/semantic/nim.rs +0 -0
  80. {diffctx-1.8.1 → diffctx-1.9.1}/diffctx/src/edges/semantic/nix.rs +0 -0
  81. {diffctx-1.8.1 → diffctx-1.9.1}/diffctx/src/edges/semantic/ocaml.rs +0 -0
  82. {diffctx-1.8.1 → diffctx-1.9.1}/diffctx/src/edges/semantic/openapi.rs +0 -0
  83. {diffctx-1.8.1 → diffctx-1.9.1}/diffctx/src/edges/semantic/perl.rs +0 -0
  84. {diffctx-1.8.1 → diffctx-1.9.1}/diffctx/src/edges/semantic/php.rs +0 -0
  85. {diffctx-1.8.1 → diffctx-1.9.1}/diffctx/src/edges/semantic/prisma.rs +0 -0
  86. {diffctx-1.8.1 → diffctx-1.9.1}/diffctx/src/edges/semantic/protobuf.rs +0 -0
  87. {diffctx-1.8.1 → diffctx-1.9.1}/diffctx/src/edges/semantic/python.rs +0 -0
  88. {diffctx-1.8.1 → diffctx-1.9.1}/diffctx/src/edges/semantic/r_lang.rs +0 -0
  89. {diffctx-1.8.1 → diffctx-1.9.1}/diffctx/src/edges/semantic/ruby.rs +0 -0
  90. {diffctx-1.8.1 → diffctx-1.9.1}/diffctx/src/edges/semantic/rust_lang.rs +0 -0
  91. {diffctx-1.8.1 → diffctx-1.9.1}/diffctx/src/edges/semantic/shell.rs +0 -0
  92. {diffctx-1.8.1 → diffctx-1.9.1}/diffctx/src/edges/semantic/sql.rs +0 -0
  93. {diffctx-1.8.1 → diffctx-1.9.1}/diffctx/src/edges/semantic/swift.rs +0 -0
  94. {diffctx-1.8.1 → diffctx-1.9.1}/diffctx/src/edges/semantic/tags.rs +0 -0
  95. {diffctx-1.8.1 → diffctx-1.9.1}/diffctx/src/edges/semantic/terraform.rs +0 -0
  96. {diffctx-1.8.1 → diffctx-1.9.1}/diffctx/src/edges/semantic/zig.rs +0 -0
  97. {diffctx-1.8.1 → diffctx-1.9.1}/diffctx/src/edges/similarity/lexical.rs +0 -0
  98. {diffctx-1.8.1 → diffctx-1.9.1}/diffctx/src/edges/similarity/mod.rs +0 -0
  99. {diffctx-1.8.1 → diffctx-1.9.1}/diffctx/src/edges/structural/containment.rs +0 -0
  100. {diffctx-1.8.1 → diffctx-1.9.1}/diffctx/src/edges/structural/mod.rs +0 -0
  101. {diffctx-1.8.1 → diffctx-1.9.1}/diffctx/src/edges/structural/sibling.rs +0 -0
  102. {diffctx-1.8.1 → diffctx-1.9.1}/diffctx/src/edges/structural/testing.rs +0 -0
  103. {diffctx-1.8.1 → diffctx-1.9.1}/diffctx/src/fragmentation.rs +0 -0
  104. {diffctx-1.8.1 → diffctx-1.9.1}/diffctx/src/graph.rs +0 -0
  105. {diffctx-1.8.1 → diffctx-1.9.1}/diffctx/src/graph_export.rs +0 -0
  106. {diffctx-1.8.1 → diffctx-1.9.1}/diffctx/src/languages.rs +0 -0
  107. {diffctx-1.8.1 → diffctx-1.9.1}/diffctx/src/lib.rs +0 -0
  108. {diffctx-1.8.1 → diffctx-1.9.1}/diffctx/src/main.rs +0 -0
  109. {diffctx-1.8.1 → diffctx-1.9.1}/diffctx/src/mode.rs +0 -0
  110. {diffctx-1.8.1 → diffctx-1.9.1}/diffctx/src/parsers/config_parser.rs +0 -0
  111. {diffctx-1.8.1 → diffctx-1.9.1}/diffctx/src/parsers/generic.rs +0 -0
  112. {diffctx-1.8.1 → diffctx-1.9.1}/diffctx/src/parsers/markdown.rs +0 -0
  113. {diffctx-1.8.1 → diffctx-1.9.1}/diffctx/src/parsers/mod.rs +0 -0
  114. {diffctx-1.8.1 → diffctx-1.9.1}/diffctx/src/parsers/tree_sitter_strategy.rs +0 -0
  115. {diffctx-1.8.1 → diffctx-1.9.1}/diffctx/src/ppr.rs +0 -0
  116. {diffctx-1.8.1 → diffctx-1.9.1}/diffctx/src/project_graph.rs +0 -0
  117. {diffctx-1.8.1 → diffctx-1.9.1}/diffctx/src/scoring.rs +0 -0
  118. {diffctx-1.8.1 → diffctx-1.9.1}/diffctx/src/select.rs +0 -0
  119. {diffctx-1.8.1 → diffctx-1.9.1}/diffctx/src/stopwords.rs +0 -0
  120. {diffctx-1.8.1 → diffctx-1.9.1}/diffctx/src/test_harness.rs +0 -0
  121. {diffctx-1.8.1 → diffctx-1.9.1}/diffctx/src/types.rs +0 -0
  122. {diffctx-1.8.1 → diffctx-1.9.1}/diffctx/src/utility/boltzmann.rs +0 -0
  123. {diffctx-1.8.1 → diffctx-1.9.1}/diffctx/src/utility/importance.rs +0 -0
  124. {diffctx-1.8.1 → diffctx-1.9.1}/diffctx/src/utility/mod.rs +0 -0
  125. {diffctx-1.8.1 → diffctx-1.9.1}/diffctx/src/utility/needs.rs +0 -0
  126. {diffctx-1.8.1 → diffctx-1.9.1}/diffctx/src/utility/scoring.rs +0 -0
  127. {diffctx-1.8.1 → diffctx-1.9.1}/diffctx/tests/common/mod.rs +0 -0
  128. {diffctx-1.8.1 → diffctx-1.9.1}/diffctx/tests/fixtures/garbage/garbage_api.py +0 -0
  129. {diffctx-1.8.1 → diffctx-1.9.1}/diffctx/tests/fixtures/garbage/garbage_constants.py +0 -0
  130. {diffctx-1.8.1 → diffctx-1.9.1}/diffctx/tests/fixtures/garbage/garbage_handlers.py +0 -0
  131. {diffctx-1.8.1 → diffctx-1.9.1}/diffctx/tests/fixtures/garbage/garbage_models.py +0 -0
  132. {diffctx-1.8.1 → diffctx-1.9.1}/diffctx/tests/fixtures/garbage/garbage_module.js +0 -0
  133. {diffctx-1.8.1 → diffctx-1.9.1}/diffctx/tests/fixtures/garbage/garbage_services.py +0 -0
  134. {diffctx-1.8.1 → diffctx-1.9.1}/diffctx/tests/fixtures/garbage/garbage_types.py +0 -0
  135. {diffctx-1.8.1 → diffctx-1.9.1}/diffctx/tests/fixtures/garbage/garbage_unrelated.yaml +0 -0
  136. {diffctx-1.8.1 → diffctx-1.9.1}/diffctx/tests/fixtures/garbage/garbage_utils.py +0 -0
  137. {diffctx-1.8.1 → diffctx-1.9.1}/diffctx/tests/fixtures/garbage/garbage_validators.py +0 -0
  138. {diffctx-1.8.1 → diffctx-1.9.1}/diffctx/tests/yaml_cases.rs +0 -0
  139. {diffctx-1.8.1 → diffctx-1.9.1}/rust-toolchain.toml +0 -0
  140. {diffctx-1.8.1 → diffctx-1.9.1}/src/diffctx/__init__.py +0 -0
  141. {diffctx-1.8.1 → diffctx-1.9.1}/src/diffctx/__main__.py +0 -0
  142. {diffctx-1.8.1 → diffctx-1.9.1}/src/diffctx/cli.py +0 -0
  143. {diffctx-1.8.1 → diffctx-1.9.1}/src/diffctx/clipboard.py +0 -0
  144. {diffctx-1.8.1 → diffctx-1.9.1}/src/diffctx/diffctx/__init__.py +0 -0
  145. {diffctx-1.8.1 → diffctx-1.9.1}/src/diffctx/diffctx/graph_analytics.py +0 -0
  146. {diffctx-1.8.1 → diffctx-1.9.1}/src/diffctx/diffctx/graph_export.py +0 -0
  147. {diffctx-1.8.1 → diffctx-1.9.1}/src/diffctx/diffctx/pipeline.py +0 -0
  148. {diffctx-1.8.1 → diffctx-1.9.1}/src/diffctx/diffctx/project_graph.py +0 -0
  149. {diffctx-1.8.1 → diffctx-1.9.1}/src/diffctx/ignore.py +0 -0
  150. {diffctx-1.8.1 → diffctx-1.9.1}/src/diffctx/logger.py +0 -0
  151. {diffctx-1.8.1 → diffctx-1.9.1}/src/diffctx/main.py +0 -0
  152. {diffctx-1.8.1 → diffctx-1.9.1}/src/diffctx/mcp/README.md +0 -0
  153. {diffctx-1.8.1 → diffctx-1.9.1}/src/diffctx/mcp/__init__.py +0 -0
  154. {diffctx-1.8.1 → diffctx-1.9.1}/src/diffctx/mcp/__main__.py +0 -0
  155. {diffctx-1.8.1 → diffctx-1.9.1}/src/diffctx/mcp/formatting.py +0 -0
  156. {diffctx-1.8.1 → diffctx-1.9.1}/src/diffctx/mcp/security.py +0 -0
  157. {diffctx-1.8.1 → diffctx-1.9.1}/src/diffctx/mcp/server.py +0 -0
  158. {diffctx-1.8.1 → diffctx-1.9.1}/src/diffctx/py.typed +0 -0
  159. {diffctx-1.8.1 → diffctx-1.9.1}/src/diffctx/tokens.py +0 -0
  160. {diffctx-1.8.1 → diffctx-1.9.1}/src/diffctx/tree.py +0 -0
@@ -7,6 +7,35 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
7
7
 
8
8
  ## [Unreleased]
9
9
 
10
+ ## [1.9.0] - 2026-06-14
11
+
12
+ ### Added
13
+
14
+ - Diff-context output now leads with an orientation header — `commit_message`
15
+ and `changed_files` — in every format (YAML/JSON/Markdown/text), so a reader
16
+ sees *what* changed before reading any fragment.
17
+ - Each fragment carries a `role` of `changed` when it overlaps the diff hunks
18
+ (omitted for supporting context). Changed code is emitted first; context
19
+ follows, ordered by descending per-file relevance instead of alphabetically.
20
+
21
+ ### Changed
22
+
23
+ - Line-contiguous fragments of the same role within a file are merged into a
24
+ single entry, cutting the per-fragment scaffolding that dominated output made
25
+ up of one-line snippets (lossless on line coverage).
26
+
27
+ ### Fixed
28
+
29
+ - `get_changed_files` and `get_untracked_files` canonicalize paths consistently
30
+ with deleted/discovered files, preventing duplicate fragments when a tracked
31
+ path traverses a symlinked directory.
32
+ - Signature extraction no longer terminates at braces inside parameter defaults
33
+ or annotations (e.g. Python `def f(x={}):`), which previously truncated the
34
+ signature mid-parameter-list.
35
+ - The post-pass that rescues unrepresented changed files reuses the open
36
+ `git cat-file --batch` reader instead of spawning a `git show` per file.
37
+ - Degraded token-count fallback returns 0 for the empty string (was 1).
38
+
10
39
  ## [1.8.0]
11
40
 
12
41
  ### Added
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: diffctx
3
- Version: 1.8.1
3
+ Version: 1.9.1
4
4
  Classifier: Development Status :: 5 - Production/Stable
5
5
  Classifier: Environment :: Console
6
6
  Classifier: Intended Audience :: Developers
@@ -143,9 +143,8 @@ fn find_hub_noise_paths(graph: &Graph, changed_paths: &FxHashSet<Arc<str>>) -> F
143
143
 
144
144
  noise_counts
145
145
  .into_iter()
146
- .filter(|(p, count)| {
147
- *count >= 1
148
- && !direct_edge_paths.contains(p)
146
+ .filter(|(p, _count)| {
147
+ !direct_edge_paths.contains(p)
149
148
  && !changed_dirs.contains(
150
149
  &Path::new(p.as_ref())
151
150
  .parent()
@@ -376,7 +376,15 @@ pub fn get_changed_files(repo_root: &Path, diff_range: Option<&str>) -> Result<V
376
376
  args.push(range);
377
377
  }
378
378
  let parts = run_git_z(repo_root, &args)?;
379
- Ok(parts.iter().map(|p| repo_root.join(p)).collect())
379
+ Ok(parts
380
+ .iter()
381
+ .map(|p| {
382
+ repo_root
383
+ .join(p)
384
+ .canonicalize()
385
+ .unwrap_or_else(|_| repo_root.join(p))
386
+ })
387
+ .collect())
380
388
  }
381
389
 
382
390
  pub fn get_deleted_files(repo_root: &Path, diff_range: Option<&str>) -> Result<FxHashSet<PathBuf>> {
@@ -481,7 +489,15 @@ pub fn get_untracked_files(repo_root: &Path) -> Result<Vec<PathBuf>> {
481
489
  repo_root,
482
490
  &["ls-files", "--others", "--exclude-standard", "-z"],
483
491
  )?;
484
- Ok(parts.iter().map(|p| repo_root.join(p)).collect())
492
+ Ok(parts
493
+ .iter()
494
+ .map(|p| {
495
+ repo_root
496
+ .join(p)
497
+ .canonicalize()
498
+ .unwrap_or_else(|_| repo_root.join(p))
499
+ })
500
+ .collect())
485
501
  }
486
502
 
487
503
  pub struct CatFileBatch {
@@ -45,10 +45,12 @@ impl IntervalIndex {
45
45
  continue;
46
46
  }
47
47
  // Strict `>`: a fragment starting on the very last line of an
48
- // already-selected fragment is adjacent, not overlapping. Compact
49
- // languages (Rust/Go/Scala one-liners, Lisp `}{` chains) routinely
50
- // produce back-to-back fragments sharing exactly that boundary
51
- // line; treating it as overlap silently drops the next fragment.
48
+ // already-selected fragment shares exactly one boundary line. We
49
+ // deliberately tolerate that one-line overlap rather than drop the
50
+ // candidate, because compact languages (Rust/Go/Scala one-liners,
51
+ // Lisp `}{` chains) routinely produce back-to-back fragments sharing
52
+ // that boundary line; rejecting them would silently discard the
53
+ // next fragment's unique content for the sake of one duplicated line.
52
54
  if end > frag.start_line() {
53
55
  return true;
54
56
  }
@@ -165,7 +165,20 @@ pub fn build_diff_context_in_memory(
165
165
  );
166
166
 
167
167
  let dummy_root = Path::new(".");
168
- build_diff_context_output(dummy_root, &selected, no_content)
168
+ let mut changed_list: Vec<String> = changed_paths.iter().cloned().collect();
169
+ changed_list.sort();
170
+ let change = crate::render::ChangeSummary {
171
+ commit_message: None,
172
+ changed_files: changed_list,
173
+ };
174
+ build_diff_context_output(
175
+ dummy_root,
176
+ &selected,
177
+ no_content,
178
+ &core_ids,
179
+ &scoring_result.rel_scores,
180
+ change,
181
+ )
169
182
  }
170
183
 
171
184
  fn compute_memory_hunks(
@@ -323,6 +336,8 @@ fn empty_output(name: &str) -> DiffContextOutput {
323
336
  DiffContextOutput {
324
337
  name: name.to_string(),
325
338
  output_type: "diff_context".to_string(),
339
+ commit_message: None,
340
+ changed_files: Vec::new(),
326
341
  fragment_count: 0,
327
342
  fragments: Vec::new(),
328
343
  latency: None,
@@ -42,6 +42,7 @@ pub struct ScoredState {
42
42
  pub needs: Vec<InformationNeed>,
43
43
  pub changed_files: Vec<PathBuf>,
44
44
  pub preferred_revs: Vec<String>,
45
+ pub commit_message: Option<String>,
45
46
  pub heavy_latency_ms: HeavyLatencyMs,
46
47
  }
47
48
 
@@ -157,6 +158,15 @@ pub fn compute_scored_state(
157
158
  .map(git::split_diff_range)
158
159
  .unwrap_or((None, None));
159
160
  let preferred_revs = build_preferred_revs(base_rev.as_deref(), head_rev.as_deref());
161
+ let commit_message = head_rev
162
+ .as_deref()
163
+ .and_then(|h| git::get_commit_message(&root_dir, h).ok())
164
+ .and_then(|m| {
165
+ m.lines()
166
+ .map(str::trim)
167
+ .find(|l| !l.is_empty())
168
+ .map(str::to_string)
169
+ });
160
170
 
161
171
  let t0 = Instant::now();
162
172
 
@@ -303,6 +313,7 @@ pub fn compute_scored_state(
303
313
  needs,
304
314
  changed_files,
305
315
  preferred_revs,
316
+ commit_message,
306
317
  heavy_latency_ms,
307
318
  })
308
319
  }
@@ -412,7 +423,27 @@ pub fn select_with_params(
412
423
  + select_ms;
413
424
 
414
425
  let cap_stats = state.scoring_result.graph.cap_stats;
415
- let mut output = render::build_diff_context_output(&state.root_dir, &selected, no_content);
426
+ let change = render::ChangeSummary {
427
+ commit_message: state.commit_message.clone(),
428
+ changed_files: state
429
+ .changed_files
430
+ .iter()
431
+ .map(|p| {
432
+ p.strip_prefix(&state.root_dir)
433
+ .unwrap_or(p)
434
+ .to_string_lossy()
435
+ .replace('\\', "/")
436
+ })
437
+ .collect(),
438
+ };
439
+ let mut output = render::build_diff_context_output(
440
+ &state.root_dir,
441
+ &selected,
442
+ no_content,
443
+ &state.core_ids,
444
+ &state.scoring_result.rel_scores,
445
+ change,
446
+ );
416
447
  output.latency = Some(render::LatencyBreakdown {
417
448
  parse_changed_ms: state.heavy_latency_ms.parse_changed,
418
449
  universe_walk_ms: state.heavy_latency_ms.universe_walk,
@@ -479,10 +510,37 @@ fn build_diff_context_full(
479
510
  assign_token_counts(&mut sig_frags);
480
511
  all_fragments.extend(sig_frags);
481
512
  changed_files.sort();
513
+ let core_ids = identify_core_fragments(&hunks, &all_fragments);
482
514
  let selected = select_full_mode(&all_fragments, &changed_files);
483
515
  batch_reader.close();
516
+ let commit_message = head_rev
517
+ .as_deref()
518
+ .and_then(|h| git::get_commit_message(&root_dir, h).ok())
519
+ .and_then(|m| {
520
+ m.lines()
521
+ .map(str::trim)
522
+ .find(|l| !l.is_empty())
523
+ .map(str::to_string)
524
+ });
525
+ let change = render::ChangeSummary {
526
+ commit_message,
527
+ changed_files: changed_files
528
+ .iter()
529
+ .map(|p| {
530
+ p.strip_prefix(&root_dir)
531
+ .unwrap_or(p)
532
+ .to_string_lossy()
533
+ .replace('\\', "/")
534
+ })
535
+ .collect(),
536
+ };
484
537
  Ok(render::build_diff_context_output(
485
- &root_dir, &selected, no_content,
538
+ &root_dir,
539
+ &selected,
540
+ no_content,
541
+ &core_ids,
542
+ &FxHashMap::default(),
543
+ change,
486
544
  ))
487
545
  }
488
546
 
@@ -504,6 +562,7 @@ fn empty_scored_state(root_dir: PathBuf) -> ScoredState {
504
562
  needs: Vec::new(),
505
563
  changed_files: Vec::new(),
506
564
  preferred_revs: Vec::new(),
565
+ commit_message: None,
507
566
  heavy_latency_ms: HeavyLatencyMs::default(),
508
567
  }
509
568
  }
@@ -519,6 +578,8 @@ fn empty_output(root_dir: &Path) -> DiffContextOutput {
519
578
  DiffContextOutput {
520
579
  name,
521
580
  output_type: "diff_context".to_string(),
581
+ commit_message: None,
582
+ changed_files: Vec::new(),
522
583
  fragment_count: 0,
523
584
  fragments: Vec::new(),
524
585
  latency: None,
@@ -207,7 +207,7 @@ pub fn ensure_changed_files_represented(
207
207
  remaining_budget: u32,
208
208
  root_dir: &Path,
209
209
  preferred_revs: &[String],
210
- _batch_reader: Option<&mut CatFileBatch>,
210
+ mut batch_reader: Option<&mut CatFileBatch>,
211
211
  ) {
212
212
  let selected_paths: FxHashSet<String> = selected
213
213
  .iter()
@@ -245,7 +245,12 @@ pub fn ensure_changed_files_represented(
245
245
  let path_str = path.to_string_lossy().to_string();
246
246
  let candidates = frags_by_path.get(&path_str).cloned().unwrap_or_default();
247
247
  let candidates = if candidates.is_empty() {
248
- match create_whole_file_fragment(path, root_dir, preferred_revs, None) {
248
+ match create_whole_file_fragment(
249
+ path,
250
+ root_dir,
251
+ preferred_revs,
252
+ batch_reader.as_deref_mut(),
253
+ ) {
249
254
  Some(f) => vec![f],
250
255
  None => continue,
251
256
  }
@@ -156,6 +156,8 @@ impl DiffContextResult {
156
156
  DiffContextOutput {
157
157
  name: self.name.clone(),
158
158
  output_type: "diff_context".to_string(),
159
+ commit_message: None,
160
+ changed_files: Vec::new(),
159
161
  fragment_count: self.fragment_count,
160
162
  fragments: self
161
163
  .fragments
@@ -163,6 +165,7 @@ impl DiffContextResult {
163
165
  .map(|f| FragmentEntry {
164
166
  path: f.path.clone(),
165
167
  lines: f.lines.clone(),
168
+ role: None,
166
169
  kind: f.kind.clone(),
167
170
  symbol: f.symbol.clone(),
168
171
  content: f.content.clone(),
@@ -277,6 +280,12 @@ fn build_diff_context<'py>(
277
280
  let dict = PyDict::new(py);
278
281
  dict.set_item("name", &output.name)?;
279
282
  dict.set_item("type", "diff_context")?;
283
+ if let Some(ref msg) = output.commit_message {
284
+ dict.set_item("commit_message", msg)?;
285
+ }
286
+ if !output.changed_files.is_empty() {
287
+ dict.set_item("changed_files", &output.changed_files)?;
288
+ }
280
289
  dict.set_item("fragment_count", output.fragment_count)?;
281
290
 
282
291
  let frag_list = PyList::empty(py);
@@ -284,6 +293,9 @@ fn build_diff_context<'py>(
284
293
  let frag_dict = PyDict::new(py);
285
294
  frag_dict.set_item("path", &entry.path)?;
286
295
  frag_dict.set_item("lines", &entry.lines)?;
296
+ if let Some(ref role) = entry.role {
297
+ frag_dict.set_item("role", role)?;
298
+ }
287
299
  frag_dict.set_item("kind", &entry.kind)?;
288
300
  if let Some(ref s) = entry.symbol {
289
301
  frag_dict.set_item("symbol", s)?;
@@ -381,6 +393,8 @@ fn select_with_params<'py>(
381
393
  .map(|n| n.to_string_lossy().to_string())
382
394
  .unwrap_or_else(|| inner.root_dir.to_string_lossy().to_string()),
383
395
  output_type: "diff_context".to_string(),
396
+ commit_message: None,
397
+ changed_files: Vec::new(),
384
398
  fragment_count: 0,
385
399
  fragments: Vec::new(),
386
400
  latency: None,
@@ -398,6 +412,12 @@ fn diff_context_output_to_dict<'py>(
398
412
  let dict = PyDict::new(py);
399
413
  dict.set_item("name", &output.name)?;
400
414
  dict.set_item("type", "diff_context")?;
415
+ if let Some(ref msg) = output.commit_message {
416
+ dict.set_item("commit_message", msg)?;
417
+ }
418
+ if !output.changed_files.is_empty() {
419
+ dict.set_item("changed_files", &output.changed_files)?;
420
+ }
401
421
  dict.set_item("fragment_count", output.fragment_count)?;
402
422
 
403
423
  let frag_list = PyList::empty(py);
@@ -405,6 +425,9 @@ fn diff_context_output_to_dict<'py>(
405
425
  let frag_dict = PyDict::new(py);
406
426
  frag_dict.set_item("path", &entry.path)?;
407
427
  frag_dict.set_item("lines", &entry.lines)?;
428
+ if let Some(ref role) = entry.role {
429
+ frag_dict.set_item("role", role)?;
430
+ }
408
431
  frag_dict.set_item("kind", &entry.kind)?;
409
432
  if let Some(ref s) = entry.symbol {
410
433
  frag_dict.set_item("symbol", s)?;
@@ -1,19 +1,32 @@
1
- use std::collections::BTreeMap;
2
1
  use std::path::Path;
3
2
  use std::sync::Arc;
4
3
 
5
4
  use once_cell::sync::Lazy;
6
5
  use regex::Regex;
6
+ use rustc_hash::{FxHashMap, FxHashSet};
7
7
  use serde::Serialize;
8
8
 
9
9
  use crate::config::render::RENDER;
10
- use crate::types::{Fragment, FragmentKind};
10
+ use crate::types::{Fragment, FragmentId, FragmentKind};
11
+
12
+ /// Orientation header for the diff-context output: tells the reader *what*
13
+ /// changed before they read the fragments. Empty for working-tree / no-commit
14
+ /// diffs.
15
+ #[derive(Default)]
16
+ pub struct ChangeSummary {
17
+ pub commit_message: Option<String>,
18
+ pub changed_files: Vec<String>,
19
+ }
11
20
 
12
21
  #[derive(Serialize)]
13
22
  pub struct DiffContextOutput {
14
23
  pub name: String,
15
24
  #[serde(rename = "type")]
16
25
  pub output_type: String,
26
+ #[serde(skip_serializing_if = "Option::is_none")]
27
+ pub commit_message: Option<String>,
28
+ #[serde(skip_serializing_if = "Vec::is_empty")]
29
+ pub changed_files: Vec<String>,
17
30
  pub fragment_count: usize,
18
31
  pub fragments: Vec<FragmentEntry>,
19
32
  #[serde(skip)]
@@ -71,6 +84,11 @@ pub struct LatencyBreakdown {
71
84
  pub struct FragmentEntry {
72
85
  pub path: String,
73
86
  pub lines: String,
87
+ /// `Some("changed")` for fragments overlapping the diff hunks; omitted
88
+ /// (treated as supporting context) otherwise. This is the single signal a
89
+ /// reader needs to tell the change apart from its context.
90
+ #[serde(skip_serializing_if = "Option::is_none")]
91
+ pub role: Option<String>,
74
92
  pub kind: String,
75
93
  #[serde(skip_serializing_if = "Option::is_none")]
76
94
  pub symbol: Option<String>,
@@ -189,36 +207,111 @@ fn create_fragment_entry(frag: &Fragment, path_str: &str) -> FragmentEntry {
189
207
  FragmentEntry {
190
208
  path: path_str.to_string(),
191
209
  lines: format!("{}-{}", frag.start_line(), frag.end_line()),
210
+ role: None,
192
211
  kind: frag.kind.as_str().to_string(),
193
212
  symbol,
194
213
  content,
195
214
  }
196
215
  }
197
216
 
217
+ /// Collapse a file's fragments (sorted by start line) into the rendered
218
+ /// entries, merging runs of same-role, line-contiguous fragments
219
+ /// (`next.start == cur.end + 1`) into one. Merging touching ranges is lossless
220
+ /// on line coverage and removes the per-fragment scaffolding tax that dominates
221
+ /// output dominated by one-line snippets.
222
+ fn merge_file_fragments(
223
+ rel_path: &str,
224
+ frags: &[&Fragment],
225
+ core_ids: &FxHashSet<FragmentId>,
226
+ ) -> Vec<(bool, u32, FragmentEntry)> {
227
+ let mut out: Vec<(bool, u32, FragmentEntry)> = Vec::new();
228
+ let mut i = 0;
229
+ while i < frags.len() {
230
+ let first = frags[i];
231
+ let role_changed = core_ids.contains(&first.id);
232
+ let mut end = first.end_line();
233
+ let mut parts: Vec<&str> = vec![first.content.trim_end_matches('\n')];
234
+ let mut j = i + 1;
235
+ while j < frags.len() {
236
+ let next = frags[j];
237
+ if core_ids.contains(&next.id) == role_changed && next.start_line() == end + 1 {
238
+ parts.push(next.content.trim_end_matches('\n'));
239
+ end = next.end_line();
240
+ j += 1;
241
+ } else {
242
+ break;
243
+ }
244
+ }
245
+
246
+ let mut entry = create_fragment_entry(first, rel_path);
247
+ if j > i + 1 {
248
+ entry.lines = format!("{}-{}", first.start_line(), end);
249
+ let merged = parts.join("\n");
250
+ entry.content = if merged.is_empty() {
251
+ None
252
+ } else {
253
+ Some(Arc::from(merged.as_str()))
254
+ };
255
+ }
256
+ entry.role = role_changed.then(|| "changed".to_string());
257
+ out.push((role_changed, first.start_line(), entry));
258
+ i = j;
259
+ }
260
+ out
261
+ }
262
+
198
263
  pub fn build_diff_context_output(
199
264
  repo_root: &Path,
200
265
  selected: &[Fragment],
201
266
  no_content: bool,
267
+ core_ids: &FxHashSet<FragmentId>,
268
+ rel_scores: &FxHashMap<FragmentId, f64>,
269
+ change: ChangeSummary,
202
270
  ) -> DiffContextOutput {
203
- let mut by_path: BTreeMap<String, Vec<&Fragment>> = BTreeMap::new();
271
+ let mut by_path: FxHashMap<String, Vec<&Fragment>> = FxHashMap::default();
204
272
  for frag in selected {
205
- let rel_path = get_relative_path(frag, repo_root);
206
- by_path.entry(rel_path).or_default().push(frag);
273
+ by_path
274
+ .entry(get_relative_path(frag, repo_root))
275
+ .or_default()
276
+ .push(frag);
207
277
  }
208
278
 
209
- let mut fragments_out: Vec<FragmentEntry> = Vec::new();
279
+ // Changed code first (the answer to "what changed"), then supporting
280
+ // context ordered by descending per-file relevance so the reader's primacy
281
+ // attention lands on the most relevant material, not on alphabetical noise.
282
+ let mut changed: Vec<(String, u32, FragmentEntry)> = Vec::new();
283
+ let mut context: Vec<(f64, String, u32, FragmentEntry)> = Vec::new();
210
284
  for (rel_path, frags) in &by_path {
211
- let mut sorted_frags: Vec<&&Fragment> = frags.iter().collect();
212
- sorted_frags.sort_by_key(|f| f.start_line());
213
- for frag in sorted_frags {
214
- let mut entry = create_fragment_entry(frag, rel_path);
285
+ let mut sorted: Vec<&Fragment> = frags.clone();
286
+ sorted.sort_by_key(|f| f.start_line());
287
+ let file_rel = sorted
288
+ .iter()
289
+ .map(|f| rel_scores.get(&f.id).copied().unwrap_or(0.0))
290
+ .fold(0.0_f64, f64::max);
291
+ for (role_changed, start, mut entry) in merge_file_fragments(rel_path, &sorted, core_ids) {
215
292
  if no_content {
216
293
  entry.content = None;
217
294
  }
218
- fragments_out.push(entry);
295
+ if role_changed {
296
+ changed.push((rel_path.clone(), start, entry));
297
+ } else {
298
+ context.push((file_rel, rel_path.clone(), start, entry));
299
+ }
219
300
  }
220
301
  }
221
302
 
303
+ changed.sort_by(|a, b| a.0.cmp(&b.0).then(a.1.cmp(&b.1)));
304
+ context.sort_by(|a, b| {
305
+ b.0.partial_cmp(&a.0)
306
+ .unwrap_or(std::cmp::Ordering::Equal)
307
+ .then(a.1.cmp(&b.1))
308
+ .then(a.2.cmp(&b.2))
309
+ });
310
+
311
+ let mut fragments_out: Vec<FragmentEntry> = Vec::with_capacity(changed.len() + context.len());
312
+ fragments_out.extend(changed.into_iter().map(|(_, _, e)| e));
313
+ fragments_out.extend(context.into_iter().map(|(_, _, _, e)| e));
314
+
222
315
  let resolved = repo_root
223
316
  .canonicalize()
224
317
  .unwrap_or_else(|_| repo_root.to_path_buf());
@@ -230,6 +323,8 @@ pub fn build_diff_context_output(
230
323
  DiffContextOutput {
231
324
  name,
232
325
  output_type: "diff_context".to_string(),
326
+ commit_message: change.commit_message,
327
+ changed_files: change.changed_files,
233
328
  fragment_count: fragments_out.len(),
234
329
  fragments: fragments_out,
235
330
  latency: None,
@@ -72,7 +72,11 @@ fn find_signature_end(lines: &[&str]) -> usize {
72
72
  if op > 0 {
73
73
  seen_open_paren = true;
74
74
  }
75
- if ob - cb > 0 {
75
+ // A body-opening brace only ends the signature once we are outside the
76
+ // parameter list. Braces inside parameter defaults or annotations
77
+ // (e.g. Python `def f(x={}):`) appear while `paren_depth > 0` and must
78
+ // not truncate the signature mid-parameter-list.
79
+ if paren_depth <= 0 && ob - cb > 0 {
76
80
  return i + 1;
77
81
  }
78
82
  if seen_open_paren && paren_depth <= 0 {
@@ -40,6 +40,7 @@ pub fn count_tokens(text: &str) -> u32 {
40
40
  // aborting the host process.
41
41
  match try_count_tokens(text) {
42
42
  Ok(n) => n,
43
+ Err(_) if text.is_empty() => 0,
43
44
  Err(_) => ((text.len() as u32) / 4).max(1),
44
45
  }
45
46
  }
@@ -5,7 +5,7 @@ requires = [ "maturin>=1.10,<1.14" ]
5
5
 
6
6
  [project]
7
7
  name = "diffctx"
8
- version = "1.8.1"
8
+ version = "1.9.1"
9
9
  description = "Export codebase structure and contents for AI/LLM context"
10
10
  readme = "README.md"
11
11
  keywords = [
@@ -0,0 +1 @@
1
+ __version__ = "1.9.1"
@@ -98,6 +98,8 @@ def _write_yaml_node(file: TextIO, node: dict[str, Any], indent: str = "") -> No
98
98
  def _write_yaml_fragment(file: TextIO, frag: dict[str, Any], indent: str = "") -> None:
99
99
  file.write(f'{indent}- path: "{_escape_yaml_string(frag.get("path", ""))}"\n')
100
100
  file.write(f'{indent} lines: "{_escape_yaml_string(frag.get("lines", ""))}"\n')
101
+ if frag.get("role"):
102
+ file.write(f'{indent} role: "{_escape_yaml_string(frag["role"])}"\n')
101
103
  file.write(f'{indent} kind: "{_escape_yaml_string(frag.get("kind", "unknown"))}"\n')
102
104
 
103
105
  if frag.get("symbol"):
@@ -113,6 +115,12 @@ def write_tree_yaml(file: TextIO, tree: dict[str, Any]) -> None:
113
115
  file.write(f"type: {tree['type']}\n")
114
116
 
115
117
  if tree.get("type") == "diff_context" and tree.get("fragments"):
118
+ if tree.get("commit_message"):
119
+ file.write(f'commit_message: "{_escape_yaml_string(str(tree["commit_message"]))}"\n')
120
+ if tree.get("changed_files"):
121
+ file.write("changed_files:\n")
122
+ for path in tree["changed_files"]:
123
+ file.write(f' - "{_escape_yaml_string(str(path))}"\n')
116
124
  file.write(f"fragment_count: {len(tree['fragments'])}\n")
117
125
  file.write("fragments:\n")
118
126
  for frag in tree["fragments"]:
@@ -171,6 +179,8 @@ def _write_text_fragment(file: TextIO, frag: dict[str, Any], indent: str = "") -
171
179
  header += f" ({symbol})"
172
180
  if kind:
173
181
  header += f" [{kind}]"
182
+ if frag.get("role"):
183
+ header += f" <{frag['role']}>"
174
184
  file.write(f"{indent}{header}\n")
175
185
 
176
186
  if frag.get("content"):
@@ -192,6 +202,10 @@ def write_tree_text(file: TextIO, tree: dict[str, Any]) -> None:
192
202
  file.write(f"{name}/\n")
193
203
 
194
204
  if tree_type == "diff_context" and tree.get("fragments"):
205
+ if tree.get("commit_message"):
206
+ file.write(f" change: {tree['commit_message']}\n")
207
+ if tree.get("changed_files"):
208
+ file.write(f" changed files: {', '.join(str(p) for p in tree['changed_files'])}\n")
195
209
  for frag in tree["fragments"]:
196
210
  _write_text_fragment(file, frag, " ")
197
211
  elif tree.get("children"):
@@ -301,6 +315,8 @@ def _write_markdown_fragment(file: TextIO, frag: dict[str, Any]) -> None:
301
315
  header += f" **{_escape_markdown(symbol)}**"
302
316
  if kind:
303
317
  header += f" _{_escape_markdown(kind)}_"
318
+ if frag.get("role") == "changed":
319
+ header = f"🔹 {header} — **changed**"
304
320
  file.write(f"## {header}\n\n")
305
321
 
306
322
  if frag.get("content"):
@@ -319,6 +335,13 @@ def write_tree_markdown(file: TextIO, tree: dict[str, Any]) -> None:
319
335
  file.write(f"# {name}/\n\n")
320
336
 
321
337
  if tree_type == "diff_context" and tree.get("fragments"):
338
+ if tree.get("commit_message"):
339
+ file.write(f"> {tree['commit_message']}\n\n")
340
+ if tree.get("changed_files"):
341
+ file.write("**Changed files:**\n\n")
342
+ for path in tree["changed_files"]:
343
+ file.write(f"- {_escape_md_inline_code(str(path))}\n")
344
+ file.write("\n")
322
345
  for frag in tree["fragments"]:
323
346
  _write_markdown_fragment(file, frag)
324
347
  elif tree.get("children"):
@@ -1 +0,0 @@
1
- __version__ = "1.8.1"
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes