rugged 0.18.0.gh.de28323 → 0.19.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (283) hide show
  1. data/README.md +9 -4
  2. data/Rakefile +1 -1
  3. data/ext/rugged/extconf.rb +10 -0
  4. data/ext/rugged/rugged.c +153 -86
  5. data/ext/rugged/rugged.h +44 -33
  6. data/ext/rugged/rugged_blob.c +288 -60
  7. data/ext/rugged/rugged_branch.c +82 -57
  8. data/ext/rugged/rugged_commit.c +83 -86
  9. data/ext/rugged/rugged_config.c +68 -68
  10. data/ext/rugged/rugged_diff.c +509 -0
  11. data/ext/rugged/rugged_diff_delta.c +94 -0
  12. data/ext/rugged/rugged_diff_hunk.c +100 -0
  13. data/ext/rugged/rugged_diff_line.c +79 -0
  14. data/ext/rugged/rugged_diff_patch.c +169 -0
  15. data/ext/rugged/rugged_index.c +539 -8
  16. data/ext/rugged/rugged_note.c +74 -80
  17. data/ext/rugged/rugged_object.c +63 -8
  18. data/ext/rugged/rugged_reference.c +231 -145
  19. data/ext/rugged/rugged_remote.c +509 -53
  20. data/ext/rugged/rugged_repo.c +572 -236
  21. data/ext/rugged/rugged_revwalk.c +59 -36
  22. data/ext/rugged/rugged_settings.c +7 -9
  23. data/ext/rugged/rugged_signature.c +7 -11
  24. data/ext/rugged/rugged_tag.c +93 -39
  25. data/ext/rugged/rugged_tree.c +321 -58
  26. data/lib/rugged.rb +1 -0
  27. data/lib/rugged/commit.rb +16 -1
  28. data/lib/rugged/console.rb +9 -0
  29. data/lib/rugged/diff.rb +19 -0
  30. data/lib/rugged/diff/delta.rb +54 -0
  31. data/lib/rugged/diff/hunk.rb +23 -0
  32. data/lib/rugged/diff/line.rb +29 -0
  33. data/lib/rugged/diff/patch.rb +28 -0
  34. data/lib/rugged/repository.rb +36 -39
  35. data/lib/rugged/version.rb +1 -1
  36. data/test/blob_test.rb +308 -1
  37. data/test/branch_test.rb +7 -0
  38. data/test/commit_test.rb +7 -10
  39. data/test/coverage/cover.rb +9 -1
  40. data/test/diff_test.rb +777 -0
  41. data/test/fixtures/archive.tar.gz +0 -0
  42. data/test/fixtures/attr/attr0 +1 -0
  43. data/test/fixtures/attr/attr1 +29 -0
  44. data/test/fixtures/attr/attr2 +21 -0
  45. data/test/fixtures/attr/attr3 +4 -0
  46. data/test/fixtures/attr/binfile +1 -0
  47. data/test/fixtures/attr/dir/file +0 -0
  48. data/test/fixtures/attr/file +1 -0
  49. data/test/fixtures/attr/gitattributes +29 -0
  50. data/test/fixtures/attr/gitignore +2 -0
  51. data/test/fixtures/attr/ign +1 -0
  52. data/test/fixtures/attr/macro_bad +1 -0
  53. data/test/fixtures/attr/macro_test +1 -0
  54. data/test/fixtures/attr/root_test1 +1 -0
  55. data/test/fixtures/attr/root_test2 +6 -0
  56. data/test/fixtures/attr/root_test3 +19 -0
  57. data/test/fixtures/attr/root_test4.txt +14 -0
  58. data/test/fixtures/attr/sub/abc +37 -0
  59. data/test/fixtures/attr/sub/dir/file +0 -0
  60. data/test/fixtures/attr/sub/file +1 -0
  61. data/test/fixtures/attr/sub/ign/file +1 -0
  62. data/test/fixtures/attr/sub/ign/sub/file +1 -0
  63. data/test/fixtures/attr/sub/sub/dir +0 -0
  64. data/test/fixtures/attr/sub/sub/file +1 -0
  65. data/test/fixtures/attr/sub/sub/subsub.txt +1 -0
  66. data/test/fixtures/attr/sub/subdir_test1 +2 -0
  67. data/test/fixtures/attr/sub/subdir_test2.txt +1 -0
  68. data/test/fixtures/diff/another.txt +38 -0
  69. data/test/fixtures/diff/readme.txt +36 -0
  70. data/test/fixtures/mergedrepo/conflicts-one.txt +5 -0
  71. data/test/fixtures/mergedrepo/conflicts-two.txt +5 -0
  72. data/test/fixtures/mergedrepo/one.txt +10 -0
  73. data/test/fixtures/mergedrepo/two.txt +12 -0
  74. data/test/fixtures/status/current_file +1 -0
  75. data/test/fixtures/status/ignored_file +1 -0
  76. data/test/fixtures/status/modified_file +2 -0
  77. data/test/fixtures/status/new_file +1 -0
  78. data/test/fixtures/status/staged_changes +2 -0
  79. data/test/fixtures/status/staged_changes_modified_file +3 -0
  80. data/test/fixtures/status/staged_delete_modified_file +1 -0
  81. data/test/fixtures/status/staged_new_file +1 -0
  82. data/test/fixtures/status/staged_new_file_modified_file +2 -0
  83. data/test/fixtures/status/subdir.txt +2 -0
  84. data/test/fixtures/status/subdir/current_file +1 -0
  85. data/test/fixtures/status/subdir/modified_file +2 -0
  86. data/test/fixtures/status/subdir/new_file +1 -0
  87. data/test/fixtures/status//350/277/231 +1 -0
  88. data/test/fixtures/testrepo.git/config +5 -0
  89. data/test/fixtures/testrepo.git/objects/77/71329dfa3002caf8c61a0ceb62a31d09023f37 +0 -0
  90. data/test/fixtures/text_file.md +464 -0
  91. data/test/fixtures/unsymlinked.git/HEAD +1 -0
  92. data/test/fixtures/unsymlinked.git/config +6 -0
  93. data/test/fixtures/unsymlinked.git/description +1 -0
  94. data/test/fixtures/unsymlinked.git/info/exclude +2 -0
  95. data/test/fixtures/unsymlinked.git/objects/08/8b64704e0d6b8bd061dea879418cb5442a3fbf +0 -0
  96. data/test/fixtures/unsymlinked.git/objects/13/a5e939bca25940c069fd2169d993dba328e30b +0 -0
  97. data/test/fixtures/unsymlinked.git/objects/19/bf568e59e3a0b363cafb4106226e62d4a4c41c +0 -0
  98. data/test/fixtures/unsymlinked.git/objects/58/1fadd35b4cf320d102a152f918729011604773 +0 -0
  99. data/test/fixtures/unsymlinked.git/objects/5c/87b6791e8b13da658a14d1ef7e09b5dc3bac8c +0 -0
  100. data/test/fixtures/unsymlinked.git/objects/6f/e5f5398af85fb3de8a6aba0339b6d3bfa26a27 +0 -0
  101. data/test/fixtures/unsymlinked.git/objects/7f/ccd75616ec188b8f1b23d67506a334cc34a49d +0 -0
  102. data/test/fixtures/unsymlinked.git/objects/80/6999882bf91d24241e4077906b9017605eb1f3 +0 -0
  103. data/test/fixtures/unsymlinked.git/objects/83/7d176303c5005505ec1e4a30231c40930c0230 +0 -0
  104. data/test/fixtures/unsymlinked.git/objects/a8/595ccca04f40818ae0155c8f9c77a230e597b6 +2 -0
  105. data/test/fixtures/unsymlinked.git/objects/cf/8f1cf5cce859c438d6cc067284cb5e161206e7 +0 -0
  106. data/test/fixtures/unsymlinked.git/objects/d5/278d05c8607ec420bfee4cf219fbc0eeebfd6a +0 -0
  107. data/test/fixtures/unsymlinked.git/objects/f4/e16fb76536591a41454194058d048d8e4dd2e9 +0 -0
  108. data/test/fixtures/unsymlinked.git/objects/f9/e65619d93fdf2673882e0a261c5e93b1a84006 +0 -0
  109. data/test/fixtures/unsymlinked.git/refs/heads/exe-file +1 -0
  110. data/test/fixtures/unsymlinked.git/refs/heads/master +1 -0
  111. data/test/fixtures/unsymlinked.git/refs/heads/reg-file +1 -0
  112. data/test/index_test.rb +120 -0
  113. data/test/reference_test.rb +38 -3
  114. data/test/remote_test.rb +224 -3
  115. data/test/repo_reset_test.rb +2 -0
  116. data/test/repo_test.rb +147 -10
  117. data/test/test_helper.rb +5 -2
  118. data/vendor/libgit2/include/git2/attr.h +3 -3
  119. data/vendor/libgit2/include/git2/blob.h +11 -17
  120. data/vendor/libgit2/include/git2/branch.h +3 -2
  121. data/vendor/libgit2/include/git2/checkout.h +7 -0
  122. data/vendor/libgit2/include/git2/clone.h +3 -0
  123. data/vendor/libgit2/include/git2/commit.h +61 -66
  124. data/vendor/libgit2/include/git2/common.h +73 -42
  125. data/vendor/libgit2/include/git2/config.h +57 -71
  126. data/vendor/libgit2/include/git2/cred_helpers.h +2 -2
  127. data/vendor/libgit2/include/git2/diff.h +179 -30
  128. data/vendor/libgit2/include/git2/errors.h +3 -3
  129. data/vendor/libgit2/include/git2/index.h +225 -146
  130. data/vendor/libgit2/include/git2/indexer.h +2 -22
  131. data/vendor/libgit2/include/git2/inttypes.h +9 -9
  132. data/vendor/libgit2/include/git2/merge.h +123 -5
  133. data/vendor/libgit2/include/git2/odb.h +59 -38
  134. data/vendor/libgit2/include/git2/odb_backend.h +45 -104
  135. data/vendor/libgit2/include/git2/oid.h +30 -19
  136. data/vendor/libgit2/include/git2/pack.h +21 -3
  137. data/vendor/libgit2/include/git2/refdb.h +0 -35
  138. data/vendor/libgit2/include/git2/refs.h +93 -31
  139. data/vendor/libgit2/include/git2/refspec.h +17 -0
  140. data/vendor/libgit2/include/git2/remote.h +60 -20
  141. data/vendor/libgit2/include/git2/repository.h +48 -70
  142. data/vendor/libgit2/include/git2/reset.h +3 -3
  143. data/vendor/libgit2/include/git2/revparse.h +22 -0
  144. data/vendor/libgit2/include/git2/stash.h +1 -1
  145. data/vendor/libgit2/include/git2/status.h +131 -56
  146. data/vendor/libgit2/include/git2/strarray.h +2 -2
  147. data/vendor/libgit2/include/git2/submodule.h +16 -16
  148. data/vendor/libgit2/include/git2/sys/commit.h +46 -0
  149. data/vendor/libgit2/include/git2/sys/config.h +71 -0
  150. data/vendor/libgit2/include/git2/sys/index.h +179 -0
  151. data/vendor/libgit2/include/git2/sys/odb_backend.h +86 -0
  152. data/vendor/libgit2/include/git2/sys/refdb_backend.h +158 -0
  153. data/vendor/libgit2/include/git2/sys/refs.h +38 -0
  154. data/vendor/libgit2/include/git2/sys/repository.h +106 -0
  155. data/vendor/libgit2/include/git2/tag.h +44 -18
  156. data/vendor/libgit2/include/git2/trace.h +1 -2
  157. data/vendor/libgit2/include/git2/transport.h +74 -0
  158. data/vendor/libgit2/include/git2/tree.h +12 -22
  159. data/vendor/libgit2/include/git2/types.h +33 -0
  160. data/vendor/libgit2/include/git2/version.h +2 -2
  161. data/vendor/libgit2/src/array.h +66 -0
  162. data/vendor/libgit2/src/attr.c +26 -13
  163. data/vendor/libgit2/src/attr_file.c +3 -2
  164. data/vendor/libgit2/src/attr_file.h +3 -3
  165. data/vendor/libgit2/src/attrcache.h +4 -4
  166. data/vendor/libgit2/src/blob.c +13 -9
  167. data/vendor/libgit2/src/blob.h +2 -2
  168. data/vendor/libgit2/src/branch.c +67 -49
  169. data/vendor/libgit2/src/cache.c +224 -54
  170. data/vendor/libgit2/src/cache.h +33 -20
  171. data/vendor/libgit2/src/checkout.c +145 -85
  172. data/vendor/libgit2/src/clone.c +62 -50
  173. data/vendor/libgit2/src/commit.c +74 -40
  174. data/vendor/libgit2/src/commit.h +2 -3
  175. data/vendor/libgit2/src/commit_list.c +14 -8
  176. data/vendor/libgit2/src/config.c +119 -36
  177. data/vendor/libgit2/src/config.h +3 -0
  178. data/vendor/libgit2/src/config_cache.c +24 -7
  179. data/vendor/libgit2/src/config_file.c +9 -6
  180. data/vendor/libgit2/src/crlf.c +4 -2
  181. data/vendor/libgit2/src/date.c +3 -3
  182. data/vendor/libgit2/src/delta.c +1 -1
  183. data/vendor/libgit2/src/diff.c +681 -303
  184. data/vendor/libgit2/src/diff.h +34 -2
  185. data/vendor/libgit2/src/diff_driver.c +405 -0
  186. data/vendor/libgit2/src/diff_driver.h +49 -0
  187. data/vendor/libgit2/src/diff_file.c +447 -0
  188. data/vendor/libgit2/src/diff_file.h +58 -0
  189. data/vendor/libgit2/src/diff_patch.c +995 -0
  190. data/vendor/libgit2/src/diff_patch.h +46 -0
  191. data/vendor/libgit2/src/diff_print.c +430 -0
  192. data/vendor/libgit2/src/diff_tform.c +464 -203
  193. data/vendor/libgit2/src/diff_xdiff.c +166 -0
  194. data/vendor/libgit2/src/diff_xdiff.h +28 -0
  195. data/vendor/libgit2/src/fetch.c +11 -4
  196. data/vendor/libgit2/src/fileops.c +85 -61
  197. data/vendor/libgit2/src/fileops.h +4 -0
  198. data/vendor/libgit2/src/global.c +10 -2
  199. data/vendor/libgit2/src/global.h +0 -8
  200. data/vendor/libgit2/src/hash/hash_generic.h +3 -3
  201. data/vendor/libgit2/src/hash/hash_win32.h +4 -4
  202. data/vendor/libgit2/src/hashsig.c +0 -1
  203. data/vendor/libgit2/src/ignore.c +68 -28
  204. data/vendor/libgit2/src/ignore.h +10 -1
  205. data/vendor/libgit2/src/index.c +666 -84
  206. data/vendor/libgit2/src/index.h +6 -0
  207. data/vendor/libgit2/src/indexer.c +10 -28
  208. data/vendor/libgit2/src/iterator.c +427 -283
  209. data/vendor/libgit2/src/iterator.h +58 -4
  210. data/vendor/libgit2/src/merge.c +1892 -32
  211. data/vendor/libgit2/src/merge.h +132 -5
  212. data/vendor/libgit2/src/merge_file.c +174 -0
  213. data/vendor/libgit2/src/merge_file.h +71 -0
  214. data/vendor/libgit2/src/mwindow.c +1 -1
  215. data/vendor/libgit2/src/notes.c +45 -48
  216. data/vendor/libgit2/src/object.c +89 -127
  217. data/vendor/libgit2/src/object.h +0 -1
  218. data/vendor/libgit2/src/object_api.c +129 -0
  219. data/vendor/libgit2/src/odb.c +156 -59
  220. data/vendor/libgit2/src/odb.h +5 -2
  221. data/vendor/libgit2/src/odb_loose.c +31 -17
  222. data/vendor/libgit2/src/odb_pack.c +39 -43
  223. data/vendor/libgit2/src/oid.c +62 -27
  224. data/vendor/libgit2/src/oid.h +33 -0
  225. data/vendor/libgit2/src/oidmap.h +4 -6
  226. data/vendor/libgit2/src/pack-objects.c +54 -22
  227. data/vendor/libgit2/src/pack.c +98 -56
  228. data/vendor/libgit2/src/pack.h +3 -1
  229. data/vendor/libgit2/src/pathspec.c +26 -1
  230. data/vendor/libgit2/src/pathspec.h +14 -0
  231. data/vendor/libgit2/src/pool.c +5 -0
  232. data/vendor/libgit2/src/posix.c +2 -2
  233. data/vendor/libgit2/src/posix.h +3 -0
  234. data/vendor/libgit2/src/push.c +13 -10
  235. data/vendor/libgit2/src/refdb.c +82 -62
  236. data/vendor/libgit2/src/refdb.h +16 -16
  237. data/vendor/libgit2/src/refdb_fs.c +386 -133
  238. data/vendor/libgit2/src/reflog.c +3 -1
  239. data/vendor/libgit2/src/refs.c +247 -221
  240. data/vendor/libgit2/src/refs.h +2 -1
  241. data/vendor/libgit2/src/refspec.c +18 -1
  242. data/vendor/libgit2/src/refspec.h +3 -1
  243. data/vendor/libgit2/src/remote.c +434 -253
  244. data/vendor/libgit2/src/remote.h +5 -3
  245. data/vendor/libgit2/src/repository.c +197 -111
  246. data/vendor/libgit2/src/repository.h +26 -5
  247. data/vendor/libgit2/src/reset.c +1 -1
  248. data/vendor/libgit2/src/revparse.c +84 -79
  249. data/vendor/libgit2/src/revwalk.c +1 -1
  250. data/vendor/libgit2/src/signature.c +22 -10
  251. data/vendor/libgit2/src/stash.c +5 -2
  252. data/vendor/libgit2/src/status.c +311 -107
  253. data/vendor/libgit2/src/status.h +23 -0
  254. data/vendor/libgit2/src/submodule.c +21 -13
  255. data/vendor/libgit2/src/tag.c +42 -31
  256. data/vendor/libgit2/src/tag.h +2 -3
  257. data/vendor/libgit2/src/thread-utils.h +105 -3
  258. data/vendor/libgit2/src/trace.c +1 -2
  259. data/vendor/libgit2/src/trace.h +3 -3
  260. data/vendor/libgit2/src/transport.c +18 -6
  261. data/vendor/libgit2/src/transports/cred.c +103 -1
  262. data/vendor/libgit2/src/transports/local.c +19 -9
  263. data/vendor/libgit2/src/transports/smart_protocol.c +32 -12
  264. data/vendor/libgit2/src/transports/ssh.c +519 -0
  265. data/vendor/libgit2/src/transports/winhttp.c +3 -1
  266. data/vendor/libgit2/src/tree.c +26 -28
  267. data/vendor/libgit2/src/tree.h +3 -3
  268. data/vendor/libgit2/src/unix/posix.h +2 -0
  269. data/vendor/libgit2/src/util.c +43 -6
  270. data/vendor/libgit2/src/util.h +40 -12
  271. data/vendor/libgit2/src/vector.c +3 -5
  272. data/vendor/libgit2/src/vector.h +9 -0
  273. data/vendor/libgit2/src/win32/dir.c +1 -1
  274. data/vendor/libgit2/src/win32/error.c +2 -0
  275. data/vendor/libgit2/src/win32/findfile.c +3 -6
  276. data/vendor/libgit2/src/win32/posix_w32.c +85 -59
  277. data/vendor/libgit2/src/win32/pthread.c +16 -8
  278. data/vendor/libgit2/src/win32/pthread.h +7 -4
  279. metadata +407 -306
  280. data/test/coverage/HEAD.json +0 -1
  281. data/vendor/libgit2/include/git2/refdb_backend.h +0 -109
  282. data/vendor/libgit2/src/diff_output.c +0 -1819
  283. data/vendor/libgit2/src/diff_output.h +0 -93
@@ -5,10 +5,14 @@
5
5
  * a Linking Exception. For full terms see the included COPYING file.
6
6
  */
7
7
  #include "common.h"
8
- #include "diff.h"
8
+
9
9
  #include "git2/config.h"
10
10
  #include "git2/blob.h"
11
+
12
+ #include "diff.h"
11
13
  #include "hashsig.h"
14
+ #include "path.h"
15
+ #include "fileops.h"
12
16
 
13
17
  static git_diff_delta *diff_delta__dup(
14
18
  const git_diff_delta *d, git_pool *pool)
@@ -18,12 +22,15 @@ static git_diff_delta *diff_delta__dup(
18
22
  return NULL;
19
23
 
20
24
  memcpy(delta, d, sizeof(git_diff_delta));
25
+ GIT_DIFF_FLAG__CLEAR_INTERNAL(delta->flags);
21
26
 
22
- delta->old_file.path = git_pool_strdup(pool, d->old_file.path);
23
- if (delta->old_file.path == NULL)
24
- goto fail;
27
+ if (d->old_file.path != NULL) {
28
+ delta->old_file.path = git_pool_strdup(pool, d->old_file.path);
29
+ if (delta->old_file.path == NULL)
30
+ goto fail;
31
+ }
25
32
 
26
- if (d->new_file.path != d->old_file.path) {
33
+ if (d->new_file.path != d->old_file.path && d->new_file.path != NULL) {
27
34
  delta->new_file.path = git_pool_strdup(pool, d->new_file.path);
28
35
  if (delta->new_file.path == NULL)
29
36
  goto fail;
@@ -170,7 +177,7 @@ int git_diff_merge(
170
177
  return error;
171
178
  }
172
179
 
173
- static int find_similar__hashsig_for_file(
180
+ int git_diff_find_similar__hashsig_for_file(
174
181
  void **out, const git_diff_file *f, const char *path, void *p)
175
182
  {
176
183
  git_hashsig_option_t opt = (git_hashsig_option_t)p;
@@ -178,7 +185,7 @@ static int find_similar__hashsig_for_file(
178
185
 
179
186
  GIT_UNUSED(f);
180
187
  error = git_hashsig_create_fromfile((git_hashsig **)out, path, opt);
181
-
188
+
182
189
  if (error == GIT_EBUFS) {
183
190
  error = 0;
184
191
  giterr_clear();
@@ -187,15 +194,15 @@ static int find_similar__hashsig_for_file(
187
194
  return error;
188
195
  }
189
196
 
190
- static int find_similar__hashsig_for_buf(
197
+ int git_diff_find_similar__hashsig_for_buf(
191
198
  void **out, const git_diff_file *f, const char *buf, size_t len, void *p)
192
199
  {
193
200
  git_hashsig_option_t opt = (git_hashsig_option_t)p;
194
201
  int error = 0;
195
-
202
+
196
203
  GIT_UNUSED(f);
197
204
  error = git_hashsig_create((git_hashsig **)out, buf, len, opt);
198
-
205
+
199
206
  if (error == GIT_EBUFS) {
200
207
  error = 0;
201
208
  giterr_clear();
@@ -204,13 +211,13 @@ static int find_similar__hashsig_for_buf(
204
211
  return error;
205
212
  }
206
213
 
207
- static void find_similar__hashsig_free(void *sig, void *payload)
214
+ void git_diff_find_similar__hashsig_free(void *sig, void *payload)
208
215
  {
209
216
  GIT_UNUSED(payload);
210
217
  git_hashsig_free(sig);
211
218
  }
212
219
 
213
- static int find_similar__calc_similarity(
220
+ int git_diff_find_similar__calc_similarity(
214
221
  int *score, void *siga, void *sigb, void *payload)
215
222
  {
216
223
  GIT_UNUSED(payload);
@@ -220,7 +227,7 @@ static int find_similar__calc_similarity(
220
227
 
221
228
  #define DEFAULT_THRESHOLD 50
222
229
  #define DEFAULT_BREAK_REWRITE_THRESHOLD 60
223
- #define DEFAULT_TARGET_LIMIT 200
230
+ #define DEFAULT_RENAME_LIMIT 200
224
231
 
225
232
  static int normalize_find_opts(
226
233
  git_diff_list *diff,
@@ -253,12 +260,25 @@ static int normalize_find_opts(
253
260
 
254
261
  /* some flags imply others */
255
262
 
263
+ if (opts->flags & GIT_DIFF_FIND_EXACT_MATCH_ONLY) {
264
+ /* if we are only looking for exact matches, then don't turn
265
+ * MODIFIED items into ADD/DELETE pairs because it's too picky
266
+ */
267
+ opts->flags &= ~(GIT_DIFF_FIND_REWRITES | GIT_DIFF_BREAK_REWRITES);
268
+
269
+ /* similarly, don't look for self-rewrites to split */
270
+ opts->flags &= ~GIT_DIFF_FIND_RENAMES_FROM_REWRITES;
271
+ }
272
+
256
273
  if (opts->flags & GIT_DIFF_FIND_RENAMES_FROM_REWRITES)
257
274
  opts->flags |= GIT_DIFF_FIND_RENAMES;
258
275
 
259
276
  if (opts->flags & GIT_DIFF_FIND_COPIES_FROM_UNMODIFIED)
260
277
  opts->flags |= GIT_DIFF_FIND_COPIES;
261
278
 
279
+ if (opts->flags & GIT_DIFF_BREAK_REWRITES)
280
+ opts->flags |= GIT_DIFF_FIND_REWRITES;
281
+
262
282
  #define USE_DEFAULT(X) ((X) == 0 || (X) > 100)
263
283
 
264
284
  if (USE_DEFAULT(opts->rename_threshold))
@@ -275,15 +295,15 @@ static int normalize_find_opts(
275
295
 
276
296
  #undef USE_DEFAULT
277
297
 
278
- if (!opts->target_limit) {
298
+ if (!opts->rename_limit) {
279
299
  int32_t limit = 0;
280
300
 
281
- opts->target_limit = DEFAULT_TARGET_LIMIT;
301
+ opts->rename_limit = DEFAULT_RENAME_LIMIT;
282
302
 
283
303
  if (git_config_get_int32(&limit, cfg, "diff.renameLimit") < 0)
284
304
  giterr_clear();
285
305
  else if (limit > 0)
286
- opts->target_limit = limit;
306
+ opts->rename_limit = limit;
287
307
  }
288
308
 
289
309
  /* assign the internal metric with whitespace flag as payload */
@@ -291,10 +311,10 @@ static int normalize_find_opts(
291
311
  opts->metric = git__malloc(sizeof(git_diff_similarity_metric));
292
312
  GITERR_CHECK_ALLOC(opts->metric);
293
313
 
294
- opts->metric->file_signature = find_similar__hashsig_for_file;
295
- opts->metric->buffer_signature = find_similar__hashsig_for_buf;
296
- opts->metric->free_signature = find_similar__hashsig_free;
297
- opts->metric->similarity = find_similar__calc_similarity;
314
+ opts->metric->file_signature = git_diff_find_similar__hashsig_for_file;
315
+ opts->metric->buffer_signature = git_diff_find_similar__hashsig_for_buf;
316
+ opts->metric->free_signature = git_diff_find_similar__hashsig_free;
317
+ opts->metric->similarity = git_diff_find_similar__calc_similarity;
298
318
 
299
319
  if (opts->flags & GIT_DIFF_FIND_IGNORE_WHITESPACE)
300
320
  opts->metric->payload = (void *)GIT_HASHSIG_IGNORE_WHITESPACE;
@@ -307,11 +327,12 @@ static int normalize_find_opts(
307
327
  return 0;
308
328
  }
309
329
 
310
- static int apply_splits_and_deletes(git_diff_list *diff, size_t expected_size)
330
+ static int apply_splits_and_deletes(
331
+ git_diff_list *diff, size_t expected_size, bool actually_split)
311
332
  {
312
333
  git_vector onto = GIT_VECTOR_INIT;
313
334
  size_t i;
314
- git_diff_delta *delta;
335
+ git_diff_delta *delta, *deleted;
315
336
 
316
337
  if (git_vector_init(&onto, expected_size, git_diff_delta__cmp) < 0)
317
338
  return -1;
@@ -321,9 +342,11 @@ static int apply_splits_and_deletes(git_diff_list *diff, size_t expected_size)
321
342
  if ((delta->flags & GIT_DIFF_FLAG__TO_DELETE) != 0)
322
343
  continue;
323
344
 
324
- if ((delta->flags & GIT_DIFF_FLAG__TO_SPLIT) != 0) {
325
- git_diff_delta *deleted = diff_delta__dup(delta, &diff->pool);
326
- if (!deleted)
345
+ if ((delta->flags & GIT_DIFF_FLAG__TO_SPLIT) != 0 && actually_split) {
346
+ delta->similarity = 0;
347
+
348
+ /* make new record for DELETED side of split */
349
+ if (!(deleted = diff_delta__dup(delta, &diff->pool)))
327
350
  goto on_error;
328
351
 
329
352
  deleted->status = GIT_DELTA_DELETED;
@@ -334,32 +357,46 @@ static int apply_splits_and_deletes(git_diff_list *diff, size_t expected_size)
334
357
  if (git_vector_insert(&onto, deleted) < 0)
335
358
  goto on_error;
336
359
 
337
- delta->status = GIT_DELTA_ADDED;
360
+ if (diff->new_src == GIT_ITERATOR_TYPE_WORKDIR)
361
+ delta->status = GIT_DELTA_UNTRACKED;
362
+ else
363
+ delta->status = GIT_DELTA_ADDED;
338
364
  memset(&delta->old_file, 0, sizeof(delta->old_file));
339
365
  delta->old_file.path = delta->new_file.path;
340
366
  delta->old_file.flags |= GIT_DIFF_FLAG_VALID_OID;
341
367
  }
342
368
 
369
+ /* clean up delta before inserting into new list */
370
+ GIT_DIFF_FLAG__CLEAR_INTERNAL(delta->flags);
371
+
372
+ if (delta->status != GIT_DELTA_COPIED &&
373
+ delta->status != GIT_DELTA_RENAMED &&
374
+ (delta->status != GIT_DELTA_MODIFIED || actually_split))
375
+ delta->similarity = 0;
376
+
377
+ /* insert into new list */
343
378
  if (git_vector_insert(&onto, delta) < 0)
344
379
  goto on_error;
345
380
  }
346
381
 
347
382
  /* cannot return an error past this point */
348
- git_vector_foreach(&diff->deltas, i, delta)
383
+
384
+ /* free deltas from old list that didn't make it to the new one */
385
+ git_vector_foreach(&diff->deltas, i, delta) {
349
386
  if ((delta->flags & GIT_DIFF_FLAG__TO_DELETE) != 0)
350
387
  git__free(delta);
388
+ }
351
389
 
352
390
  /* swap new delta list into place */
353
- git_vector_sort(&onto);
354
391
  git_vector_swap(&diff->deltas, &onto);
355
392
  git_vector_free(&onto);
393
+ git_vector_sort(&diff->deltas);
356
394
 
357
395
  return 0;
358
396
 
359
397
  on_error:
360
398
  git_vector_foreach(&onto, i, delta)
361
399
  git__free(delta);
362
-
363
400
  git_vector_free(&onto);
364
401
 
365
402
  return -1;
@@ -373,21 +410,25 @@ GIT_INLINE(git_diff_file *) similarity_get_file(git_diff_list *diff, size_t idx)
373
410
 
374
411
  static int similarity_calc(
375
412
  git_diff_list *diff,
376
- git_diff_find_options *opts,
413
+ const git_diff_find_options *opts,
377
414
  size_t file_idx,
378
415
  void **cache)
379
416
  {
380
417
  int error = 0;
381
418
  git_diff_file *file = similarity_get_file(diff, file_idx);
382
- git_iterator_type_t src = (file_idx & 1) ? diff->old_src : diff->new_src;
419
+ git_iterator_type_t src = (file_idx & 1) ? diff->new_src : diff->old_src;
383
420
 
384
421
  if (src == GIT_ITERATOR_TYPE_WORKDIR) { /* compute hashsig from file */
385
422
  git_buf path = GIT_BUF_INIT;
386
423
 
387
424
  /* TODO: apply wd-to-odb filters to file data if necessary */
388
425
 
389
- if (!(error = git_buf_joinpath(
390
- &path, git_repository_workdir(diff->repo), file->path)))
426
+ if ((error = git_buf_joinpath(
427
+ &path, git_repository_workdir(diff->repo), file->path)) < 0)
428
+ return error;
429
+
430
+ /* if path is not a regular file, just skip this item */
431
+ if (git_path_isfile(path.ptr))
391
432
  error = opts->metric->file_signature(
392
433
  &cache[file_idx], file, path.ptr, opts->metric->payload);
393
434
 
@@ -398,8 +439,11 @@ static int similarity_calc(
398
439
 
399
440
  /* TODO: add max size threshold a la diff? */
400
441
 
401
- if ((error = git_blob_lookup(&blob, diff->repo, &file->oid)) < 0)
402
- return error;
442
+ if (git_blob_lookup(&blob, diff->repo, &file->oid) < 0) {
443
+ /* if lookup fails, just skip this item in similarity calc */
444
+ giterr_clear();
445
+ return 0;
446
+ }
403
447
 
404
448
  blobsize = git_blob_rawsize(blob);
405
449
  if (!git__is_sizet(blobsize)) /* ? what to do ? */
@@ -415,268 +459,485 @@ static int similarity_calc(
415
459
  return error;
416
460
  }
417
461
 
462
+ #define FLAG_SET(opts,flag_name) (((opts)->flags & flag_name) != 0)
463
+
464
+ /* - score < 0 means files cannot be compared
465
+ * - score >= 100 means files are exact match
466
+ * - score == 0 means files are completely different
467
+ */
418
468
  static int similarity_measure(
469
+ int *score,
419
470
  git_diff_list *diff,
420
- git_diff_find_options *opts,
471
+ const git_diff_find_options *opts,
421
472
  void **cache,
422
473
  size_t a_idx,
423
474
  size_t b_idx)
424
475
  {
425
- int score = 0;
426
476
  git_diff_file *a_file = similarity_get_file(diff, a_idx);
427
477
  git_diff_file *b_file = similarity_get_file(diff, b_idx);
478
+ bool exact_match = FLAG_SET(opts, GIT_DIFF_FIND_EXACT_MATCH_ONLY);
428
479
 
480
+ *score = -1;
481
+
482
+ /* don't try to compare files of different types */
429
483
  if (GIT_MODE_TYPE(a_file->mode) != GIT_MODE_TYPE(b_file->mode))
430
484
  return 0;
431
485
 
432
- if (git_oid_cmp(&a_file->oid, &b_file->oid) == 0)
433
- return 100;
486
+ /* if exact match is requested, force calculation of missing OIDs now */
487
+ if (exact_match) {
488
+ if (git_oid_iszero(&a_file->oid) &&
489
+ diff->old_src == GIT_ITERATOR_TYPE_WORKDIR &&
490
+ !git_diff__oid_for_file(diff->repo, a_file->path,
491
+ a_file->mode, a_file->size, &a_file->oid))
492
+ a_file->flags |= GIT_DIFF_FLAG_VALID_OID;
493
+
494
+ if (git_oid_iszero(&b_file->oid) &&
495
+ diff->new_src == GIT_ITERATOR_TYPE_WORKDIR &&
496
+ !git_diff__oid_for_file(diff->repo, b_file->path,
497
+ b_file->mode, b_file->size, &b_file->oid))
498
+ b_file->flags |= GIT_DIFF_FLAG_VALID_OID;
499
+ }
500
+
501
+ /* check OID match as a quick test */
502
+ if (git_oid__cmp(&a_file->oid, &b_file->oid) == 0) {
503
+ *score = 100;
504
+ return 0;
505
+ }
506
+
507
+ /* don't calculate signatures if we are doing exact match */
508
+ if (exact_match) {
509
+ *score = 0;
510
+ return 0;
511
+ }
434
512
 
435
513
  /* update signature cache if needed */
436
514
  if (!cache[a_idx] && similarity_calc(diff, opts, a_idx, cache) < 0)
437
515
  return -1;
438
516
  if (!cache[b_idx] && similarity_calc(diff, opts, b_idx, cache) < 0)
439
517
  return -1;
440
-
518
+
441
519
  /* some metrics may not wish to process this file (too big / too small) */
442
520
  if (!cache[a_idx] || !cache[b_idx])
443
521
  return 0;
444
522
 
445
523
  /* compare signatures */
446
- if (opts->metric->similarity(
447
- &score, cache[a_idx], cache[b_idx], opts->metric->payload) < 0)
448
- return -1;
524
+ return opts->metric->similarity(
525
+ score, cache[a_idx], cache[b_idx], opts->metric->payload);
526
+ }
527
+
528
+ static int calc_self_similarity(
529
+ git_diff_list *diff,
530
+ const git_diff_find_options *opts,
531
+ size_t delta_idx,
532
+ void **cache)
533
+ {
534
+ int error, similarity = -1;
535
+ git_diff_delta *delta = GIT_VECTOR_GET(&diff->deltas, delta_idx);
449
536
 
450
- /* clip score */
451
- if (score < 0)
452
- score = 0;
453
- else if (score > 100)
454
- score = 100;
537
+ if ((delta->flags & GIT_DIFF_FLAG__HAS_SELF_SIMILARITY) != 0)
538
+ return 0;
455
539
 
456
- return score;
540
+ error = similarity_measure(
541
+ &similarity, diff, opts, cache, 2 * delta_idx, 2 * delta_idx + 1);
542
+ if (error < 0)
543
+ return error;
544
+
545
+ if (similarity >= 0) {
546
+ delta->similarity = (uint32_t)similarity;
547
+ delta->flags |= GIT_DIFF_FLAG__HAS_SELF_SIMILARITY;
548
+ }
549
+
550
+ return 0;
457
551
  }
458
552
 
459
- #define FLAG_SET(opts,flag_name) ((opts.flags & flag_name) != 0)
553
+ static bool is_rename_target(
554
+ git_diff_list *diff,
555
+ const git_diff_find_options *opts,
556
+ size_t delta_idx,
557
+ void **cache)
558
+ {
559
+ git_diff_delta *delta = GIT_VECTOR_GET(&diff->deltas, delta_idx);
560
+
561
+ /* skip things that aren't plain blobs */
562
+ if (!GIT_MODE_ISBLOB(delta->new_file.mode))
563
+ return false;
564
+
565
+ /* only consider ADDED, RENAMED, COPIED, and split MODIFIED as
566
+ * targets; maybe include UNTRACKED and IGNORED if requested.
567
+ */
568
+ switch (delta->status) {
569
+ case GIT_DELTA_UNMODIFIED:
570
+ case GIT_DELTA_DELETED:
571
+ return false;
572
+
573
+ case GIT_DELTA_MODIFIED:
574
+ if (!FLAG_SET(opts, GIT_DIFF_FIND_REWRITES) &&
575
+ !FLAG_SET(opts, GIT_DIFF_FIND_RENAMES_FROM_REWRITES))
576
+ return false;
577
+
578
+ if (calc_self_similarity(diff, opts, delta_idx, cache) < 0)
579
+ return false;
580
+
581
+ if (FLAG_SET(opts, GIT_DIFF_BREAK_REWRITES) &&
582
+ delta->similarity < opts->break_rewrite_threshold) {
583
+ delta->flags |= GIT_DIFF_FLAG__TO_SPLIT;
584
+ break;
585
+ }
586
+ if (FLAG_SET(opts, GIT_DIFF_FIND_RENAMES_FROM_REWRITES) &&
587
+ delta->similarity < opts->rename_from_rewrite_threshold)
588
+ break;
589
+
590
+ return false;
591
+
592
+ case GIT_DELTA_UNTRACKED:
593
+ case GIT_DELTA_IGNORED:
594
+ if (!FLAG_SET(opts, GIT_DIFF_FIND_FOR_UNTRACKED))
595
+ return false;
596
+ break;
597
+
598
+ default: /* all other status values should be checked */
599
+ break;
600
+ }
601
+
602
+ delta->flags |= GIT_DIFF_FLAG__IS_RENAME_TARGET;
603
+ return true;
604
+ }
605
+
606
+ static bool is_rename_source(
607
+ git_diff_list *diff,
608
+ const git_diff_find_options *opts,
609
+ size_t delta_idx,
610
+ void **cache)
611
+ {
612
+ git_diff_delta *delta = GIT_VECTOR_GET(&diff->deltas, delta_idx);
613
+
614
+ /* skip things that aren't blobs */
615
+ if (!GIT_MODE_ISBLOB(delta->old_file.mode))
616
+ return false;
617
+
618
+ switch (delta->status) {
619
+ case GIT_DELTA_ADDED:
620
+ case GIT_DELTA_UNTRACKED:
621
+ case GIT_DELTA_IGNORED:
622
+ return false;
623
+
624
+ case GIT_DELTA_DELETED:
625
+ case GIT_DELTA_TYPECHANGE:
626
+ break;
627
+
628
+ case GIT_DELTA_UNMODIFIED:
629
+ if (!FLAG_SET(opts, GIT_DIFF_FIND_COPIES_FROM_UNMODIFIED))
630
+ return false;
631
+ break;
632
+
633
+ default: /* MODIFIED, RENAMED, COPIED */
634
+ /* if we're finding copies, this could be a source */
635
+ if (FLAG_SET(opts, GIT_DIFF_FIND_COPIES))
636
+ break;
637
+
638
+ /* otherwise, this is only a source if we can split it */
639
+ if (!FLAG_SET(opts, GIT_DIFF_FIND_REWRITES) &&
640
+ !FLAG_SET(opts, GIT_DIFF_FIND_RENAMES_FROM_REWRITES))
641
+ return false;
642
+
643
+ if (calc_self_similarity(diff, opts, delta_idx, cache) < 0)
644
+ return false;
645
+
646
+ if (FLAG_SET(opts, GIT_DIFF_BREAK_REWRITES) &&
647
+ delta->similarity < opts->break_rewrite_threshold) {
648
+ delta->flags |= GIT_DIFF_FLAG__TO_SPLIT;
649
+ break;
650
+ }
651
+
652
+ if (FLAG_SET(opts, GIT_DIFF_FIND_RENAMES_FROM_REWRITES) &&
653
+ delta->similarity < opts->rename_from_rewrite_threshold)
654
+ break;
655
+
656
+ return false;
657
+ }
658
+
659
+ delta->flags |= GIT_DIFF_FLAG__IS_RENAME_SOURCE;
660
+ return true;
661
+ }
662
+
663
+ GIT_INLINE(bool) delta_is_split(git_diff_delta *delta)
664
+ {
665
+ return (delta->status == GIT_DELTA_TYPECHANGE ||
666
+ (delta->flags & GIT_DIFF_FLAG__TO_SPLIT) != 0);
667
+ }
668
+
669
+ GIT_INLINE(bool) delta_is_new_only(git_diff_delta *delta)
670
+ {
671
+ return (delta->status == GIT_DELTA_ADDED ||
672
+ delta->status == GIT_DELTA_UNTRACKED ||
673
+ delta->status == GIT_DELTA_IGNORED);
674
+ }
675
+
676
+ GIT_INLINE(void) delta_make_rename(
677
+ git_diff_delta *to, const git_diff_delta *from, uint32_t similarity)
678
+ {
679
+ to->status = GIT_DELTA_RENAMED;
680
+ to->similarity = similarity;
681
+ memcpy(&to->old_file, &from->old_file, sizeof(to->old_file));
682
+ to->flags &= ~GIT_DIFF_FLAG__TO_SPLIT;
683
+ }
684
+
685
+ typedef struct {
686
+ uint32_t idx;
687
+ uint32_t similarity;
688
+ } diff_find_match;
460
689
 
461
690
  int git_diff_find_similar(
462
691
  git_diff_list *diff,
463
692
  git_diff_find_options *given_opts)
464
693
  {
465
- size_t i, j, cache_size, *matches;
694
+ size_t i, j, sigcache_size;
466
695
  int error = 0, similarity;
467
696
  git_diff_delta *from, *to;
468
697
  git_diff_find_options opts;
469
- size_t tried_targets, num_rewrites = 0;
470
- void **cache;
698
+ size_t num_srcs = 0, num_tgts = 0, tried_srcs = 0, tried_tgts = 0;
699
+ size_t num_rewrites = 0, num_updates = 0, num_bumped = 0;
700
+ void **sigcache; /* cache of similarity metric file signatures */
701
+ diff_find_match *match_srcs = NULL, *match_tgts = NULL, *best_match;
702
+ git_diff_file swap;
471
703
 
472
704
  if ((error = normalize_find_opts(diff, &opts, given_opts)) < 0)
473
705
  return error;
474
706
 
475
- /* TODO: maybe abort if deltas.length > target_limit ??? */
476
-
477
- cache_size = diff->deltas.length * 2; /* must store b/c length may change */
478
- cache = git__calloc(cache_size, sizeof(void *));
479
- GITERR_CHECK_ALLOC(cache);
707
+ /* TODO: maybe abort if deltas.length > rename_limit ??? */
708
+ if (!git__is_uint32(diff->deltas.length))
709
+ return 0;
480
710
 
481
- matches = git__calloc(diff->deltas.length, sizeof(size_t));
482
- GITERR_CHECK_ALLOC(matches);
711
+ sigcache_size = diff->deltas.length * 2; /* keep size b/c diff may change */
712
+ sigcache = git__calloc(sigcache_size, sizeof(void *));
713
+ GITERR_CHECK_ALLOC(sigcache);
483
714
 
484
- /* first break MODIFIED records that are too different (if requested) */
715
+ /* Label rename sources and targets
716
+ *
717
+ * This will also set self-similarity scores for MODIFIED files and
718
+ * mark them for splitting if break-rewrites is enabled
719
+ */
720
+ git_vector_foreach(&diff->deltas, i, to) {
721
+ if (is_rename_source(diff, &opts, i, sigcache))
722
+ ++num_srcs;
485
723
 
486
- if (FLAG_SET(opts, GIT_DIFF_FIND_AND_BREAK_REWRITES)) {
487
- git_vector_foreach(&diff->deltas, i, from) {
488
- if (from->status != GIT_DELTA_MODIFIED)
489
- continue;
724
+ if (is_rename_target(diff, &opts, i, sigcache))
725
+ ++num_tgts;
726
+ }
490
727
 
491
- similarity = similarity_measure(
492
- diff, &opts, cache, 2 * i, 2 * i + 1);
728
+ /* if there are no candidate srcs or tgts, we're done */
729
+ if (!num_srcs || !num_tgts)
730
+ goto cleanup;
493
731
 
494
- if (similarity < 0) {
495
- error = similarity;
496
- goto cleanup;
497
- }
732
+ match_tgts = git__calloc(diff->deltas.length, sizeof(diff_find_match));
733
+ GITERR_CHECK_ALLOC(match_tgts);
734
+ match_srcs = git__calloc(diff->deltas.length, sizeof(diff_find_match));
735
+ GITERR_CHECK_ALLOC(match_srcs);
498
736
 
499
- if ((unsigned int)similarity < opts.break_rewrite_threshold) {
500
- from->flags |= GIT_DIFF_FLAG__TO_SPLIT;
501
- num_rewrites++;
502
- }
503
- }
504
- }
505
-
506
- /* next find the most similar delta for each rename / copy candidate */
737
+ /*
738
+ * Find best-fit matches for rename / copy candidates
739
+ */
507
740
 
508
- git_vector_foreach(&diff->deltas, i, from) {
509
- tried_targets = 0;
741
+ find_best_matches:
742
+ tried_tgts = num_bumped = 0;
510
743
 
511
- /* skip things that aren't blobs */
512
- if (GIT_MODE_TYPE(from->old_file.mode) !=
513
- GIT_MODE_TYPE(GIT_FILEMODE_BLOB))
744
+ git_vector_foreach(&diff->deltas, i, to) {
745
+ /* skip things that are not rename targets */
746
+ if ((to->flags & GIT_DIFF_FLAG__IS_RENAME_TARGET) == 0)
514
747
  continue;
515
748
 
516
- /* don't check UNMODIFIED files as source unless given option */
517
- if (from->status == GIT_DELTA_UNMODIFIED &&
518
- !FLAG_SET(opts, GIT_DIFF_FIND_COPIES_FROM_UNMODIFIED))
519
- continue;
749
+ tried_srcs = 0;
520
750
 
521
- /* skip all but DELETED files unless copy detection is on */
522
- if (!FLAG_SET(opts, GIT_DIFF_FIND_COPIES) &&
523
- from->status != GIT_DELTA_DELETED &&
524
- (from->flags & GIT_DIFF_FLAG__TO_SPLIT) == 0)
525
- continue;
751
+ git_vector_foreach(&diff->deltas, j, from) {
752
+ /* skip things that are not rename sources */
753
+ if ((from->flags & GIT_DIFF_FLAG__IS_RENAME_SOURCE) == 0)
754
+ continue;
526
755
 
527
- git_vector_foreach(&diff->deltas, j, to) {
756
+ /* calculate similarity for this pair and find best match */
528
757
  if (i == j)
529
- continue;
758
+ similarity = -1; /* don't measure self-similarity here */
759
+ else if ((error = similarity_measure(
760
+ &similarity, diff, &opts, sigcache, 2 * j, 2 * i + 1)) < 0)
761
+ goto cleanup;
530
762
 
531
- /* skip things that aren't blobs */
532
- if (GIT_MODE_TYPE(to->new_file.mode) !=
533
- GIT_MODE_TYPE(GIT_FILEMODE_BLOB))
534
- continue;
763
+ /* if this pairing is better for the src and the tgt, keep it */
764
+ if (similarity > 0 &&
765
+ match_tgts[i].similarity < (uint32_t)similarity &&
766
+ match_srcs[j].similarity < (uint32_t)similarity)
767
+ {
768
+ if (match_tgts[i].similarity > 0) {
769
+ match_tgts[match_srcs[j].idx].similarity = 0;
770
+ match_srcs[match_tgts[i].idx].similarity = 0;
771
+ ++num_bumped;
772
+ }
773
+
774
+ match_tgts[i].similarity = (uint32_t)similarity;
775
+ match_tgts[i].idx = (uint32_t)j;
776
+
777
+ match_srcs[j].similarity = (uint32_t)similarity;
778
+ match_srcs[j].idx = (uint32_t)i;
779
+ }
535
780
 
536
- switch (to->status) {
537
- case GIT_DELTA_ADDED:
538
- case GIT_DELTA_UNTRACKED:
539
- case GIT_DELTA_RENAMED:
540
- case GIT_DELTA_COPIED:
541
- break;
542
- case GIT_DELTA_MODIFIED:
543
- if ((to->flags & GIT_DIFF_FLAG__TO_SPLIT) == 0)
544
- continue;
781
+ if (++tried_srcs >= num_srcs)
545
782
  break;
546
- default:
547
- /* only the above status values should be checked */
548
- continue;
549
- }
550
783
 
551
- /* cap on maximum files we'll examine (per "from" file) */
552
- if (++tried_targets > opts.target_limit)
784
+ /* cap on maximum targets we'll examine (per "to" file) */
785
+ if (tried_srcs > opts.rename_limit)
553
786
  break;
787
+ }
554
788
 
555
- /* calculate similarity and see if this pair beats the
556
- * similarity score of the current best pair.
557
- */
558
- similarity = similarity_measure(
559
- diff, &opts, cache, 2 * i, 2 * j + 1);
789
+ if (++tried_tgts >= num_tgts)
790
+ break;
791
+ }
560
792
 
561
- if (similarity < 0) {
562
- error = similarity;
563
- goto cleanup;
564
- }
793
+ if (num_bumped > 0) /* try again if we bumped some items */
794
+ goto find_best_matches;
565
795
 
566
- if (to->similarity < (unsigned int)similarity) {
567
- to->similarity = (unsigned int)similarity;
568
- matches[j] = i + 1;
569
- }
570
- }
571
- }
796
+ /*
797
+ * Rewrite the diffs with renames / copies
798
+ */
572
799
 
573
- /* next rewrite the diffs with renames / copies */
800
+ tried_tgts = 0;
574
801
 
575
- git_vector_foreach(&diff->deltas, j, to) {
576
- if (!matches[j]) {
577
- assert(to->similarity == 0);
802
+ git_vector_foreach(&diff->deltas, i, to) {
803
+ /* skip things that are not rename targets */
804
+ if ((to->flags & GIT_DIFF_FLAG__IS_RENAME_TARGET) == 0)
578
805
  continue;
579
- }
580
806
 
581
- i = matches[j] - 1;
582
- from = GIT_VECTOR_GET(&diff->deltas, i);
583
- assert(from);
584
-
585
- /* four possible outcomes here:
586
- * 1. old DELETED and if over rename threshold,
587
- * new becomes RENAMED and old goes away
588
- * 2. old SPLIT and if over rename threshold,
589
- * new becomes RENAMED and old becomes ADDED (clear SPLIT)
590
- * 3. old was MODIFIED but FIND_RENAMES_FROM_REWRITES is on and
591
- * old is more similar to new than it is to itself, in which
592
- * case, new becomes RENAMED and old becomed ADDED
593
- * 4. otherwise if over copy threshold, new becomes COPIED
807
+ /* check if this delta was the target of a similarity */
808
+ best_match = &match_tgts[i];
809
+ if (!best_match->similarity)
810
+ continue;
811
+
812
+ j = best_match->idx;
813
+ from = GIT_VECTOR_GET(&diff->deltas, j);
814
+
815
+ /* possible scenarios:
816
+ * 1. from DELETE to ADD/UNTRACK/IGNORE = RENAME
817
+ * 2. from DELETE to SPLIT/TYPECHANGE = RENAME + DELETE
818
+ * 3. from SPLIT/TYPECHANGE to ADD/UNTRACK/IGNORE = ADD + RENAME
819
+ * 4. from SPLIT/TYPECHANGE to SPLIT/TYPECHANGE = RENAME + SPLIT
820
+ * 5. from OTHER to ADD/UNTRACK/IGNORE = OTHER + COPY
594
821
  */
595
822
 
596
823
  if (from->status == GIT_DELTA_DELETED) {
597
- if (to->similarity < opts.rename_threshold) {
598
- to->similarity = 0;
599
- continue;
600
- }
601
824
 
602
- to->status = GIT_DELTA_RENAMED;
603
- memcpy(&to->old_file, &from->old_file, sizeof(to->old_file));
825
+ if (delta_is_new_only(to)) {
604
826
 
605
- from->flags |= GIT_DIFF_FLAG__TO_DELETE;
606
- num_rewrites++;
827
+ if (best_match->similarity < opts.rename_threshold)
828
+ continue;
607
829
 
608
- continue;
609
- }
830
+ delta_make_rename(to, from, best_match->similarity);
610
831
 
611
- if (from->status == GIT_DELTA_MODIFIED &&
612
- (from->flags & GIT_DIFF_FLAG__TO_SPLIT) != 0)
613
- {
614
- if (to->similarity < opts.rename_threshold) {
615
- to->similarity = 0;
616
- continue;
617
- }
832
+ from->flags |= GIT_DIFF_FLAG__TO_DELETE;
833
+ num_rewrites++;
834
+ } else {
835
+ assert(delta_is_split(to));
618
836
 
619
- to->status = GIT_DELTA_RENAMED;
620
- memcpy(&to->old_file, &from->old_file, sizeof(to->old_file));
837
+ if (best_match->similarity < opts.rename_from_rewrite_threshold)
838
+ continue;
621
839
 
622
- from->status = GIT_DELTA_ADDED;
623
- from->flags &= ~GIT_DIFF_FLAG__TO_SPLIT;
624
- memset(&from->old_file, 0, sizeof(from->old_file));
625
- num_rewrites--;
840
+ memcpy(&swap, &to->old_file, sizeof(swap));
626
841
 
627
- continue;
628
- }
842
+ delta_make_rename(to, from, best_match->similarity);
843
+ num_rewrites--;
629
844
 
630
- if (from->status == GIT_DELTA_MODIFIED &&
631
- FLAG_SET(opts, GIT_DIFF_FIND_RENAMES_FROM_REWRITES) &&
632
- to->similarity > opts.rename_threshold)
633
- {
634
- similarity = similarity_measure(
635
- diff, &opts, cache, 2 * i, 2 * i + 1);
845
+ from->status = GIT_DELTA_DELETED;
846
+ memcpy(&from->old_file, &swap, sizeof(from->old_file));
847
+ memset(&from->new_file, 0, sizeof(from->new_file));
848
+ from->new_file.path = from->old_file.path;
849
+ from->new_file.flags |= GIT_DIFF_FLAG_VALID_OID;
636
850
 
637
- if (similarity < 0) {
638
- error = similarity;
639
- goto cleanup;
851
+ num_updates++;
640
852
  }
853
+ }
854
+
855
+ else if (delta_is_split(from)) {
856
+
857
+ if (delta_is_new_only(to)) {
641
858
 
642
- if ((unsigned int)similarity < opts.rename_from_rewrite_threshold) {
643
- to->status = GIT_DELTA_RENAMED;
644
- memcpy(&to->old_file, &from->old_file, sizeof(to->old_file));
859
+ if (best_match->similarity < opts.rename_threshold)
860
+ continue;
861
+
862
+ delta_make_rename(to, from, best_match->similarity);
645
863
 
646
- from->status = GIT_DELTA_ADDED;
864
+ from->status = (diff->new_src == GIT_ITERATOR_TYPE_WORKDIR) ?
865
+ GIT_DELTA_UNTRACKED : GIT_DELTA_ADDED;
647
866
  memset(&from->old_file, 0, sizeof(from->old_file));
648
- from->old_file.path = to->old_file.path;
867
+ from->old_file.path = from->new_file.path;
649
868
  from->old_file.flags |= GIT_DIFF_FLAG_VALID_OID;
650
869
 
651
- continue;
870
+ from->flags &= ~GIT_DIFF_FLAG__TO_SPLIT;
871
+ num_rewrites--;
872
+
873
+ num_updates++;
874
+ } else {
875
+ assert(delta_is_split(from));
876
+
877
+ if (best_match->similarity < opts.rename_from_rewrite_threshold)
878
+ continue;
879
+
880
+ memcpy(&swap, &to->old_file, sizeof(swap));
881
+
882
+ delta_make_rename(to, from, best_match->similarity);
883
+ num_rewrites--;
884
+ num_updates++;
885
+
886
+ memcpy(&from->old_file, &swap, sizeof(from->old_file));
887
+
888
+ /* if we've just swapped the new element into the correct
889
+ * place, clear the SPLIT flag
890
+ */
891
+ if (match_tgts[j].idx == i &&
892
+ match_tgts[j].similarity >
893
+ opts.rename_from_rewrite_threshold) {
894
+
895
+ from->status = GIT_DELTA_RENAMED;
896
+ from->similarity = match_tgts[j].similarity;
897
+ match_tgts[j].similarity = 0;
898
+ from->flags &= ~GIT_DIFF_FLAG__TO_SPLIT;
899
+ num_rewrites--;
900
+ }
901
+ /* otherwise, if we just overwrote a source, update mapping */
902
+ else if (j > i && match_srcs[i].similarity > 0) {
903
+ match_tgts[match_srcs[i].idx].idx = (uint32_t)j;
904
+ }
905
+
906
+ num_updates++;
652
907
  }
653
908
  }
654
909
 
655
- if (to->similarity < opts.copy_threshold) {
656
- to->similarity = 0;
657
- continue;
658
- }
910
+ else if (delta_is_new_only(to)) {
911
+ if (!FLAG_SET(&opts, GIT_DIFF_FIND_COPIES) ||
912
+ best_match->similarity < opts.copy_threshold)
913
+ continue;
659
914
 
660
- /* convert "to" to a COPIED record */
661
- to->status = GIT_DELTA_COPIED;
662
- memcpy(&to->old_file, &from->old_file, sizeof(to->old_file));
915
+ to->status = GIT_DELTA_COPIED;
916
+ to->similarity = best_match->similarity;
917
+ memcpy(&to->old_file, &from->old_file, sizeof(to->old_file));
918
+
919
+ num_updates++;
920
+ }
663
921
  }
664
922
 
665
- if (num_rewrites > 0) {
666
- assert(num_rewrites < diff->deltas.length);
923
+ /*
924
+ * Actually split and delete entries as needed
925
+ */
667
926
 
927
+ if (num_rewrites > 0 || num_updates > 0)
668
928
  error = apply_splits_and_deletes(
669
- diff, diff->deltas.length - num_rewrites);
670
- }
929
+ diff, diff->deltas.length - num_rewrites,
930
+ FLAG_SET(&opts, GIT_DIFF_BREAK_REWRITES));
671
931
 
672
932
  cleanup:
673
- git__free(matches);
933
+ git__free(match_srcs);
934
+ git__free(match_tgts);
674
935
 
675
- for (i = 0; i < cache_size; ++i) {
676
- if (cache[i] != NULL)
677
- opts.metric->free_signature(cache[i], opts.metric->payload);
936
+ for (i = 0; i < sigcache_size; ++i) {
937
+ if (sigcache[i] != NULL)
938
+ opts.metric->free_signature(sigcache[i], opts.metric->payload);
678
939
  }
679
- git__free(cache);
940
+ git__free(sigcache);
680
941
 
681
942
  if (!given_opts || !given_opts->metric)
682
943
  git__free(opts.metric);