jscpd-rs 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (96) hide show
  1. package/CHANGELOG.md +69 -0
  2. package/Cargo.lock +1323 -0
  3. package/Cargo.toml +54 -0
  4. package/LICENSE +21 -0
  5. package/README.md +372 -0
  6. package/docs/api-parity.md +49 -0
  7. package/docs/cloning-plan.md +281 -0
  8. package/docs/compat-baseline.md +535 -0
  9. package/docs/format-porting.md +86 -0
  10. package/docs/junior-task-template.md +62 -0
  11. package/docs/junior-workflow.md +87 -0
  12. package/docs/migrating-from-jscpd.md +193 -0
  13. package/docs/npm-release.md +116 -0
  14. package/docs/public-benchmark-suite.md +81 -0
  15. package/docs/release-checklist.md +200 -0
  16. package/docs/release-decisions.md +103 -0
  17. package/docs/release-readiness.md +51 -0
  18. package/docs/upstream-bugs.md +501 -0
  19. package/docs/upstream-issue-drafts.md +393 -0
  20. package/docs/user-guide.md +309 -0
  21. package/examples/dump_oxc_tokens.rs +112 -0
  22. package/examples/library_api.rs +42 -0
  23. package/npm/bin/jscpd-rs.js +6 -0
  24. package/npm/bin/jscpd-server.js +6 -0
  25. package/npm/lib/run-binary.js +68 -0
  26. package/npm/scripts/postinstall.js +50 -0
  27. package/package.json +53 -0
  28. package/skills/dry-refactoring/SKILL.md +63 -0
  29. package/skills/jscpd/SKILL.md +85 -0
  30. package/src/app.rs +512 -0
  31. package/src/bin/jscpd-server.rs +429 -0
  32. package/src/blame.rs +130 -0
  33. package/src/cli/config.rs +543 -0
  34. package/src/cli/parsing.rs +301 -0
  35. package/src/cli/tests.rs +543 -0
  36. package/src/cli.rs +671 -0
  37. package/src/detector/matching/secondary.rs +387 -0
  38. package/src/detector/matching.rs +274 -0
  39. package/src/detector/model.rs +190 -0
  40. package/src/detector/prepare.rs +71 -0
  41. package/src/detector/skip_local.rs +40 -0
  42. package/src/detector/statistics.rs +138 -0
  43. package/src/detector/store.rs +96 -0
  44. package/src/detector/tests.rs +238 -0
  45. package/src/detector.rs +265 -0
  46. package/src/files/discovery.rs +508 -0
  47. package/src/files/gitignore.rs +203 -0
  48. package/src/files/paths.rs +68 -0
  49. package/src/files/shebang.rs +106 -0
  50. package/src/files/tests.rs +523 -0
  51. package/src/files.rs +25 -0
  52. package/src/formats.rs +570 -0
  53. package/src/lib.rs +433 -0
  54. package/src/main.rs +26 -0
  55. package/src/report/ai.rs +125 -0
  56. package/src/report/badge.rs +238 -0
  57. package/src/report/console.rs +180 -0
  58. package/src/report/console_common.rs +37 -0
  59. package/src/report/console_full.rs +139 -0
  60. package/src/report/csv.rs +65 -0
  61. package/src/report/escape.rs +8 -0
  62. package/src/report/file_output.rs +28 -0
  63. package/src/report/html/assets.rs +47 -0
  64. package/src/report/html.rs +336 -0
  65. package/src/report/json.rs +119 -0
  66. package/src/report/markdown.rs +125 -0
  67. package/src/report/sarif.rs +302 -0
  68. package/src/report/silent.rs +22 -0
  69. package/src/report/source.rs +38 -0
  70. package/src/report/summary.rs +50 -0
  71. package/src/report/test_support.rs +133 -0
  72. package/src/report/threshold.rs +76 -0
  73. package/src/report/xcode.rs +90 -0
  74. package/src/report/xml.rs +119 -0
  75. package/src/report.rs +250 -0
  76. package/src/server/mcp.rs +942 -0
  77. package/src/server.rs +1081 -0
  78. package/src/tokenizer/apex.rs +97 -0
  79. package/src/tokenizer/blocks.rs +532 -0
  80. package/src/tokenizer/embedded.rs +106 -0
  81. package/src/tokenizer/generic.rs +511 -0
  82. package/src/tokenizer/hash.rs +27 -0
  83. package/src/tokenizer/ignore.rs +33 -0
  84. package/src/tokenizer/line_index.rs +33 -0
  85. package/src/tokenizer/markdown.rs +289 -0
  86. package/src/tokenizer/markup_attrs.rs +289 -0
  87. package/src/tokenizer/oxc/fallback.rs +275 -0
  88. package/src/tokenizer/oxc/jsx.rs +168 -0
  89. package/src/tokenizer/oxc/kind.rs +177 -0
  90. package/src/tokenizer/oxc/lexical.rs +67 -0
  91. package/src/tokenizer/oxc.rs +659 -0
  92. package/src/tokenizer/scan.rs +88 -0
  93. package/src/tokenizer/tap.rs +150 -0
  94. package/src/tokenizer/tests.rs +915 -0
  95. package/src/tokenizer.rs +328 -0
  96. package/src/verbose.rs +195 -0
package/src/server.rs ADDED
@@ -0,0 +1,1081 @@
1
+ use std::collections::HashSet;
2
+ use std::fmt::Write as _;
3
+ use std::net::{SocketAddr, ToSocketAddrs};
4
+ use std::path::{Path, PathBuf};
5
+ use std::sync::{Arc, RwLock};
6
+
7
+ use anyhow::{Context, Result, bail};
8
+ use axum::body::Bytes;
9
+ use axum::extract::DefaultBodyLimit;
10
+ use axum::extract::State;
11
+ use axum::http::header::CONTENT_TYPE;
12
+ use axum::http::{HeaderMap, Method, StatusCode, Uri};
13
+ use axum::response::{IntoResponse, Response};
14
+ use axum::routing::{get, post};
15
+ use axum::{Json, Router};
16
+ use serde::{Deserialize, Serialize};
17
+ use serde_json::Value;
18
+ use time::OffsetDateTime;
19
+ use time::format_description::well_known::Rfc3339;
20
+
21
+ use crate::cli::{Options, store_warning};
22
+ use crate::detector::{DetectionResult, Fragment, Statistics};
23
+ use crate::detector::{PreparedSourceDraft, detect_prepared_drafts, prepare_source_drafts};
24
+ use crate::files::{self, SourceFile};
25
+
26
+ mod mcp;
27
+
28
+ #[derive(Clone)]
29
+ pub struct ServerService {
30
+ state: Arc<RwLock<ServiceState>>,
31
+ }
32
+
33
+ #[derive(Clone)]
34
+ struct ServiceState {
35
+ working_directory: PathBuf,
36
+ options: Options,
37
+ project_drafts: Vec<PreparedSourceDraft>,
38
+ statistics: Option<Statistics>,
39
+ last_scan_time: Option<String>,
40
+ is_scanning: bool,
41
+ snippet_counter: u64,
42
+ mcp_sessions: HashSet<String>,
43
+ }
44
+
45
+ impl ServerService {
46
+ pub fn new(working_directory: PathBuf, options: Options) -> Self {
47
+ Self {
48
+ state: Arc::new(RwLock::new(ServiceState {
49
+ working_directory,
50
+ options,
51
+ project_drafts: Vec::new(),
52
+ statistics: None,
53
+ last_scan_time: None,
54
+ is_scanning: false,
55
+ snippet_counter: 0,
56
+ mcp_sessions: HashSet::new(),
57
+ })),
58
+ }
59
+ }
60
+
61
+ pub fn initialize(&self) -> Result<()> {
62
+ self.recheck()
63
+ }
64
+
65
+ pub fn recheck(&self) -> Result<()> {
66
+ let options = {
67
+ let mut state = self.state.write().expect("server state lock poisoned");
68
+ if state.is_scanning {
69
+ bail!(SCAN_IN_PROGRESS);
70
+ }
71
+ state.is_scanning = true;
72
+ service_detection_options(&state)
73
+ };
74
+
75
+ let result = scan_project(&options);
76
+ let mut state = self.state.write().expect("server state lock poisoned");
77
+ state.is_scanning = false;
78
+
79
+ let (project_drafts, detection_result) = result?;
80
+ state.project_drafts = project_drafts;
81
+ state.statistics = Some(detection_result.statistics);
82
+ state.last_scan_time = Some(now_rfc3339());
83
+ Ok(())
84
+ }
85
+
86
+ pub fn check_snippet(&self, request: CheckSnippetRequest) -> Result<CheckSnippetResponse> {
87
+ if request.code.trim().is_empty() {
88
+ bail!(FIELD_CODE_EMPTY);
89
+ }
90
+
91
+ let (options, project_drafts, snippet_id, working_directory) = {
92
+ let mut state = self.state.write().expect("server state lock poisoned");
93
+ if state.is_scanning {
94
+ bail!(SCAN_IN_PROGRESS);
95
+ }
96
+ if state.statistics.is_none() {
97
+ bail!(NOT_INITIALIZED);
98
+ }
99
+ let snippet_id = format!("<snippet>/snippet_{:08x}", state.snippet_counter);
100
+ state.snippet_counter += 1;
101
+ (
102
+ service_detection_options(&state),
103
+ state.project_drafts.clone(),
104
+ snippet_id,
105
+ state.working_directory.clone(),
106
+ )
107
+ };
108
+
109
+ let total_lines = request.code.split('\n').count();
110
+ let mut prepared_drafts = project_drafts;
111
+ prepared_drafts.extend(prepare_source_drafts(
112
+ vec![SourceFile {
113
+ source_id: snippet_id.clone(),
114
+ format: request.format,
115
+ content: request.code,
116
+ }],
117
+ &options,
118
+ ));
119
+ let result = detect_prepared_drafts(prepared_drafts, &options);
120
+ let duplications = result
121
+ .clones
122
+ .iter()
123
+ .filter_map(|clone| {
124
+ let snippet_is_a = clone.duplication_a.source_id == snippet_id;
125
+ let snippet_is_b = clone.duplication_b.source_id == snippet_id;
126
+ if snippet_is_a == snippet_is_b {
127
+ return None;
128
+ }
129
+ let (snippet, codebase) = if snippet_is_a {
130
+ (&clone.duplication_a, &clone.duplication_b)
131
+ } else {
132
+ (&clone.duplication_b, &clone.duplication_a)
133
+ };
134
+ Some(SnippetDuplication {
135
+ snippet_location: SnippetLocation::from_fragment(snippet),
136
+ codebase_location: DuplicationLocation::from_fragment(
137
+ codebase,
138
+ &working_directory,
139
+ &result,
140
+ ),
141
+ lines_count: fragment_line_count(snippet),
142
+ })
143
+ })
144
+ .collect::<Vec<_>>();
145
+ let statistics = duplication_statistics(&duplications, total_lines);
146
+
147
+ Ok(CheckSnippetResponse {
148
+ duplications,
149
+ statistics,
150
+ })
151
+ }
152
+
153
+ pub fn statistics(&self) -> StatsResponse {
154
+ let state = self.state.read().expect("server state lock poisoned");
155
+ StatsResponse {
156
+ statistics: state.statistics.clone(),
157
+ timestamp: state.last_scan_time.clone().unwrap_or_else(now_rfc3339),
158
+ }
159
+ }
160
+
161
+ pub fn health(&self) -> HealthResponse {
162
+ let state = self.state.read().expect("server state lock poisoned");
163
+ HealthResponse {
164
+ status: if state.is_scanning {
165
+ "initializing"
166
+ } else {
167
+ "ready"
168
+ },
169
+ working_directory: state.working_directory.display().to_string(),
170
+ last_scan_time: state.last_scan_time.clone(),
171
+ }
172
+ }
173
+
174
+ pub(crate) fn create_mcp_session(&self) -> String {
175
+ let mut state = self.state.write().expect("server state lock poisoned");
176
+ let session_id = new_mcp_session_id();
177
+ state.mcp_sessions.insert(session_id.clone());
178
+ session_id
179
+ }
180
+
181
+ pub(crate) fn has_mcp_session(&self, session_id: &str) -> bool {
182
+ let state = self.state.read().expect("server state lock poisoned");
183
+ state.mcp_sessions.contains(session_id)
184
+ }
185
+ }
186
+
187
+ fn detection_options(options: &Options) -> Options {
188
+ let mut options = options.clone();
189
+ options.reporters = vec!["json".to_string()];
190
+ options.silent = true;
191
+ options.no_tips = true;
192
+ options
193
+ }
194
+
195
+ fn service_detection_options(state: &ServiceState) -> Options {
196
+ let mut options = detection_options(&state.options);
197
+ options.paths = vec![state.working_directory.clone()];
198
+ options
199
+ }
200
+
201
+ fn scan_project(options: &Options) -> Result<(Vec<PreparedSourceDraft>, DetectionResult)> {
202
+ let files = files::discover(options)?;
203
+ let project_drafts = prepare_source_drafts(files, options);
204
+ let result = detect_prepared_drafts(project_drafts.clone(), options);
205
+ Ok((project_drafts, result))
206
+ }
207
+
208
+ pub fn create_router(service: ServerService) -> Router {
209
+ Router::new()
210
+ .route("/", get(api_info))
211
+ .route("/api/check", post(check_snippet).fallback(not_found))
212
+ .route("/api/recheck", post(recheck).fallback(not_found))
213
+ .route("/api/stats", get(stats).fallback(not_found))
214
+ .route("/api/health", get(health).fallback(not_found))
215
+ .route(
216
+ "/mcp",
217
+ post(mcp::post_mcp)
218
+ .get(mcp::method_not_allowed)
219
+ .fallback(not_found),
220
+ )
221
+ .fallback(not_found)
222
+ .layer(DefaultBodyLimit::max(10 * 1024 * 1024))
223
+ .with_state(service)
224
+ }
225
+
226
+ pub async fn serve(options: Options, host: &str, port: u16) -> Result<()> {
227
+ let working_directory = server_working_directory(&options);
228
+ serve_with_working_directory(options, working_directory, host, port).await
229
+ }
230
+
231
+ pub async fn serve_with_working_directory(
232
+ options: Options,
233
+ working_directory: PathBuf,
234
+ host: &str,
235
+ port: u16,
236
+ ) -> Result<()> {
237
+ if let Some(warning) = store_warning(&options) {
238
+ eprintln!("{warning}");
239
+ }
240
+ let service = ServerService::new(working_directory, options);
241
+ service.initialize()?;
242
+ let app = create_router(service);
243
+ let address = server_bind_address(host, port)?;
244
+ let listener = tokio::net::TcpListener::bind(address)
245
+ .await
246
+ .with_context(|| format!("failed to bind server address {address}"))?;
247
+ println!("JSCPD server running on {}", server_display_url(host, port));
248
+ axum::serve(listener, app).await.context("server failed")
249
+ }
250
+
251
+ fn server_bind_address(host: &str, port: u16) -> Result<SocketAddr> {
252
+ let bind_host = if host == "true" { "0.0.0.0" } else { host };
253
+ (bind_host, port)
254
+ .to_socket_addrs()
255
+ .with_context(|| format!("failed to resolve server address {host}:{port}"))?
256
+ .next()
257
+ .with_context(|| format!("failed to resolve server address {host}:{port}"))
258
+ }
259
+
260
+ fn server_display_url(host: &str, port: u16) -> String {
261
+ format!("http://{host}:{port}")
262
+ }
263
+
264
+ pub fn server_working_directory(options: &Options) -> PathBuf {
265
+ options
266
+ .paths
267
+ .first()
268
+ .cloned()
269
+ .unwrap_or_else(|| std::env::current_dir().unwrap_or_else(|_| PathBuf::from(".")))
270
+ }
271
+
272
+ async fn api_info() -> Json<ApiInfoResponse> {
273
+ Json(ApiInfoResponse {
274
+ name: "jscpd-server",
275
+ version: env!("CARGO_PKG_VERSION"),
276
+ endpoints: [
277
+ ("POST /api/check", "Check code snippet for duplications"),
278
+ ("GET /api/stats", "Get overall project statistics"),
279
+ ("GET /api/health", "Server health check"),
280
+ ("POST /api/recheck", "Trigger recheck of the directory"),
281
+ ("POST /mcp", "MCP Protocol endpoint"),
282
+ ]
283
+ .into_iter()
284
+ .map(|(key, value)| (key.to_string(), value.to_string()))
285
+ .collect(),
286
+ documentation: "https://github.com/kucherenko/jscpd",
287
+ })
288
+ }
289
+
290
+ async fn check_snippet(
291
+ State(service): State<ServerService>,
292
+ headers: HeaderMap,
293
+ body: Bytes,
294
+ ) -> Response {
295
+ let request = match parse_check_payload(&headers, &body) {
296
+ Ok(request) => request,
297
+ Err(CheckPayloadError::Validation(message)) => {
298
+ return error_response("ValidationError", message, 400);
299
+ }
300
+ Err(CheckPayloadError::Syntax(message)) => {
301
+ return error_response("SyntaxError", message, 400);
302
+ }
303
+ };
304
+ match service.check_snippet(request) {
305
+ Ok(response) => Json(response).into_response(),
306
+ Err(error) => error_response("Error", error.to_string(), 400),
307
+ }
308
+ }
309
+
310
+ async fn recheck(State(service): State<ServerService>) -> Response {
311
+ match service.recheck() {
312
+ Ok(()) => Json(RecheckResponse {
313
+ message: "Recheck started",
314
+ })
315
+ .into_response(),
316
+ Err(error) => error_response("Error", error.to_string(), 400),
317
+ }
318
+ }
319
+
320
+ async fn stats(State(service): State<ServerService>) -> Response {
321
+ let response = service.statistics();
322
+ if response.statistics.is_none() {
323
+ return error_response(
324
+ "NotReady",
325
+ "Statistics not available yet. Server is still initializing.",
326
+ 503,
327
+ );
328
+ }
329
+ Json(response).into_response()
330
+ }
331
+
332
+ async fn health(State(service): State<ServerService>) -> Json<HealthResponse> {
333
+ Json(service.health())
334
+ }
335
+
336
+ async fn not_found(method: Method, uri: Uri) -> Response {
337
+ error_response(
338
+ "NotFound",
339
+ format!("Route {method} {} not found", uri.path()),
340
+ 404,
341
+ )
342
+ }
343
+
344
+ fn error_response(error: &str, message: impl Into<String>, status_code: u16) -> Response {
345
+ let status = StatusCode::from_u16(status_code).unwrap_or(StatusCode::INTERNAL_SERVER_ERROR);
346
+ (
347
+ status,
348
+ Json(ErrorResponse {
349
+ error: error.to_string(),
350
+ message: message.into(),
351
+ status_code,
352
+ }),
353
+ )
354
+ .into_response()
355
+ }
356
+
357
+ fn parse_check_payload(
358
+ headers: &HeaderMap,
359
+ body: &[u8],
360
+ ) -> std::result::Result<CheckSnippetRequest, CheckPayloadError> {
361
+ let content_type = headers
362
+ .get(CONTENT_TYPE)
363
+ .and_then(|value| value.to_str().ok())
364
+ .unwrap_or_default()
365
+ .to_ascii_lowercase();
366
+ if content_type.starts_with("application/x-www-form-urlencoded") {
367
+ return parse_check_form(body).map_err(CheckPayloadError::Validation);
368
+ }
369
+ let payload = serde_json::from_slice(body)
370
+ .map_err(|error| CheckPayloadError::Syntax(json_syntax_error_message(body, &error)))?;
371
+ parse_check_request(payload).map_err(CheckPayloadError::Validation)
372
+ }
373
+
374
+ fn parse_check_form(body: &[u8]) -> std::result::Result<CheckSnippetRequest, String> {
375
+ let fields = form_urlencoded::parse(body)
376
+ .into_owned()
377
+ .collect::<Vec<_>>();
378
+ let code = required_form_field(&fields, "code")?;
379
+ if code.trim().is_empty() {
380
+ return Err(FIELD_CODE_EMPTY.to_string());
381
+ }
382
+ let format = required_form_field(&fields, "format")?;
383
+ if format.trim().is_empty() {
384
+ return Err(FIELD_FORMAT_EMPTY.to_string());
385
+ }
386
+ Ok(CheckSnippetRequest { code, format })
387
+ }
388
+
389
+ fn parse_check_request(payload: Value) -> std::result::Result<CheckSnippetRequest, String> {
390
+ let Some(object) = payload.as_object() else {
391
+ return Err("Request body must be an object".to_string());
392
+ };
393
+ let code = required_string_field(object, "code")?;
394
+ if code.trim().is_empty() {
395
+ return Err(FIELD_CODE_EMPTY.to_string());
396
+ }
397
+ let format = required_string_field(object, "format")?;
398
+ if format.trim().is_empty() {
399
+ return Err(FIELD_FORMAT_EMPTY.to_string());
400
+ }
401
+ Ok(CheckSnippetRequest { code, format })
402
+ }
403
+
404
+ fn required_string_field(
405
+ object: &serde_json::Map<String, Value>,
406
+ field: &str,
407
+ ) -> std::result::Result<String, String> {
408
+ let Some(value) = object.get(field) else {
409
+ return Err(format!("Missing required field: {field}"));
410
+ };
411
+ let Some(value) = value.as_str() else {
412
+ return Err(format!("Field \"{field}\" must be a string"));
413
+ };
414
+ Ok(value.to_string())
415
+ }
416
+
417
+ fn required_form_field(
418
+ fields: &[(String, String)],
419
+ field: &str,
420
+ ) -> std::result::Result<String, String> {
421
+ fields
422
+ .iter()
423
+ .find_map(|(name, value)| (name == field).then(|| value.clone()))
424
+ .ok_or_else(|| format!("Missing required field: {field}"))
425
+ }
426
+
427
+ fn json_syntax_error_message(body: &[u8], error: &serde_json::Error) -> String {
428
+ let body = String::from_utf8_lossy(body);
429
+ let trimmed = body.trim_start();
430
+ if let Some(first) = trimmed.chars().next()
431
+ && !matches!(first, '{' | '[' | '"' | '-' | '0'..='9' | 't' | 'f' | 'n')
432
+ {
433
+ let preview = if trimmed.chars().count() > 20 {
434
+ format!("{}...", trimmed.chars().take(17).collect::<String>())
435
+ } else {
436
+ trimmed.to_string()
437
+ };
438
+ return format!("Unexpected token '{first}', \"{preview}\" is not valid JSON");
439
+ }
440
+ error.to_string()
441
+ }
442
+
443
+ fn duplication_statistics(
444
+ duplications: &[SnippetDuplication],
445
+ total_lines: usize,
446
+ ) -> DuplicationStatistics {
447
+ let mut duplicated = Vec::<usize>::new();
448
+ for duplication in duplications {
449
+ duplicated.extend(
450
+ duplication.snippet_location.start_line..=duplication.snippet_location.end_line,
451
+ );
452
+ }
453
+ duplicated.sort_unstable();
454
+ duplicated.dedup();
455
+ let duplicated_lines = duplicated.len();
456
+ DuplicationStatistics {
457
+ total_duplications: duplications.len(),
458
+ duplicated_lines,
459
+ total_lines,
460
+ percentage_duplicated: percentage(total_lines, duplicated_lines),
461
+ }
462
+ }
463
+
464
+ fn percentage(total: usize, duplicated: usize) -> f64 {
465
+ if total == 0 {
466
+ 0.0
467
+ } else {
468
+ ((duplicated as f64 * 10000.0) / total as f64).round() / 100.0
469
+ }
470
+ }
471
+
472
+ fn relative_source_id(path: &str, working_directory: &Path) -> String {
473
+ let path_ref = Path::new(path);
474
+ path_ref
475
+ .strip_prefix(working_directory)
476
+ .ok()
477
+ .and_then(|relative| relative.to_str())
478
+ .map(str::to_string)
479
+ .unwrap_or_else(|| path.to_string())
480
+ }
481
+
482
+ fn slice_fragment(result: &DetectionResult, fragment: &Fragment) -> Option<String> {
483
+ result
484
+ .source_contents
485
+ .get(&fragment.source_id)
486
+ .and_then(|content| content.get(fragment.range[0]..fragment.range[1]))
487
+ .map(str::to_string)
488
+ }
489
+
490
+ fn fragment_line_count(fragment: &Fragment) -> usize {
491
+ fragment.end.line.saturating_sub(fragment.start.line) + 1
492
+ }
493
+
494
+ fn now_rfc3339() -> String {
495
+ OffsetDateTime::now_utc()
496
+ .format(&Rfc3339)
497
+ .unwrap_or_else(|_| "1970-01-01T00:00:00Z".to_string())
498
+ }
499
+
500
+ fn new_mcp_session_id() -> String {
501
+ let mut bytes = [0u8; 16];
502
+ getrandom::getrandom(&mut bytes).expect("OS random unavailable for MCP session id");
503
+ bytes[6] = (bytes[6] & 0x0f) | 0x40;
504
+ bytes[8] = (bytes[8] & 0x3f) | 0x80;
505
+ let mut session_id = String::with_capacity(36);
506
+ for (index, byte) in bytes.iter().enumerate() {
507
+ if matches!(index, 4 | 6 | 8 | 10) {
508
+ session_id.push('-');
509
+ }
510
+ write!(&mut session_id, "{byte:02x}").expect("write to string");
511
+ }
512
+ session_id
513
+ }
514
+
515
+ const SCAN_IN_PROGRESS: &str = "Please wait for initial scan to complete";
516
+ const NOT_INITIALIZED: &str = "Server not initialized. Please wait for initial scan to complete.";
517
+ const FIELD_CODE_EMPTY: &str = "Field \"code\" cannot be empty";
518
+ const FIELD_FORMAT_EMPTY: &str = "Field \"format\" cannot be empty";
519
+
520
+ enum CheckPayloadError {
521
+ Validation(String),
522
+ Syntax(String),
523
+ }
524
+
525
+ #[derive(Clone, Debug, Deserialize)]
526
+ pub struct CheckSnippetRequest {
527
+ pub code: String,
528
+ pub format: String,
529
+ }
530
+
531
+ #[derive(Clone, Debug, Serialize)]
532
+ #[serde(rename_all = "camelCase")]
533
+ pub struct CheckSnippetResponse {
534
+ pub duplications: Vec<SnippetDuplication>,
535
+ pub statistics: DuplicationStatistics,
536
+ }
537
+
538
+ #[derive(Clone, Debug, Serialize)]
539
+ #[serde(rename_all = "camelCase")]
540
+ pub struct SnippetDuplication {
541
+ pub snippet_location: SnippetLocation,
542
+ pub codebase_location: DuplicationLocation,
543
+ pub lines_count: usize,
544
+ }
545
+
546
+ #[derive(Clone, Debug, Serialize)]
547
+ #[serde(rename_all = "camelCase")]
548
+ pub struct SnippetLocation {
549
+ pub start_line: usize,
550
+ pub end_line: usize,
551
+ pub start_column: usize,
552
+ pub end_column: usize,
553
+ }
554
+
555
+ impl SnippetLocation {
556
+ fn from_fragment(fragment: &Fragment) -> Self {
557
+ Self {
558
+ start_line: fragment.start.line,
559
+ end_line: fragment.end.line,
560
+ start_column: fragment.start.column,
561
+ end_column: fragment.end.column,
562
+ }
563
+ }
564
+ }
565
+
566
+ #[derive(Clone, Debug, Serialize)]
567
+ #[serde(rename_all = "camelCase")]
568
+ pub struct DuplicationLocation {
569
+ pub file: String,
570
+ pub start_line: usize,
571
+ pub end_line: usize,
572
+ pub start_column: usize,
573
+ pub end_column: usize,
574
+ #[serde(skip_serializing_if = "Option::is_none")]
575
+ pub fragment: Option<String>,
576
+ }
577
+
578
+ impl DuplicationLocation {
579
+ fn from_fragment(
580
+ fragment: &Fragment,
581
+ working_directory: &Path,
582
+ result: &DetectionResult,
583
+ ) -> Self {
584
+ Self {
585
+ file: relative_source_id(&fragment.source_id, working_directory),
586
+ start_line: fragment.start.line,
587
+ end_line: fragment.end.line,
588
+ start_column: fragment.start.column,
589
+ end_column: fragment.end.column,
590
+ fragment: slice_fragment(result, fragment),
591
+ }
592
+ }
593
+ }
594
+
595
+ #[derive(Clone, Debug, Serialize)]
596
+ #[serde(rename_all = "camelCase")]
597
+ pub struct DuplicationStatistics {
598
+ pub total_duplications: usize,
599
+ pub duplicated_lines: usize,
600
+ pub total_lines: usize,
601
+ pub percentage_duplicated: f64,
602
+ }
603
+
604
+ #[derive(Clone, Debug, Serialize)]
605
+ pub struct StatsResponse {
606
+ #[serde(skip_serializing_if = "Option::is_none")]
607
+ pub statistics: Option<Statistics>,
608
+ pub timestamp: String,
609
+ }
610
+
611
+ #[derive(Clone, Debug, Serialize)]
612
+ #[serde(rename_all = "camelCase")]
613
+ pub struct HealthResponse {
614
+ pub status: &'static str,
615
+ pub working_directory: String,
616
+ pub last_scan_time: Option<String>,
617
+ }
618
+
619
+ #[derive(Clone, Debug, Serialize)]
620
+ pub struct ApiInfoResponse {
621
+ pub name: &'static str,
622
+ pub version: &'static str,
623
+ pub endpoints: std::collections::BTreeMap<String, String>,
624
+ pub documentation: &'static str,
625
+ }
626
+
627
+ #[derive(Clone, Debug, Serialize)]
628
+ #[serde(rename_all = "camelCase")]
629
+ pub struct ErrorResponse {
630
+ pub error: String,
631
+ pub message: String,
632
+ pub status_code: u16,
633
+ }
634
+
635
+ #[derive(Clone, Debug, Serialize)]
636
+ pub struct RecheckResponse {
637
+ pub message: &'static str,
638
+ }
639
+
640
+ #[cfg(test)]
641
+ mod tests {
642
+ use std::fs;
643
+ use std::time::{SystemTime, UNIX_EPOCH};
644
+
645
+ use axum::body::{Body, to_bytes};
646
+ use axum::http::header::CONTENT_TYPE;
647
+ use axum::http::{Request, StatusCode};
648
+ use serde_json::Value;
649
+ use tower::ServiceExt;
650
+
651
+ use crate::cli::Options;
652
+
653
+ use super::*;
654
+
655
+ fn fixture_project() -> PathBuf {
656
+ let mut path = std::env::temp_dir();
657
+ let stamp = SystemTime::now()
658
+ .duration_since(UNIX_EPOCH)
659
+ .expect("time")
660
+ .as_nanos();
661
+ path.push(format!("jscpd-rs-server-{stamp}"));
662
+ fs::create_dir_all(&path).expect("create temp project");
663
+ let content = "const alpha = 1;\nconst beta = 2;\nconst gamma = alpha + beta;\n";
664
+ fs::write(path.join("a.js"), content).expect("write a.js");
665
+ fs::write(path.join("b.js"), content).expect("write b.js");
666
+ path
667
+ }
668
+
669
+ fn service_for(path: &Path) -> ServerService {
670
+ let options = Options {
671
+ paths: vec![path.to_path_buf()],
672
+ min_tokens: 5,
673
+ min_lines: 2,
674
+ max_size_bytes: 1024 * 1024,
675
+ ..Options::default()
676
+ };
677
+ ServerService::new(path.to_path_buf(), options)
678
+ }
679
+
680
+ #[test]
681
+ fn server_initialization_populates_stats_and_health() {
682
+ let path = fixture_project();
683
+ let service = service_for(&path);
684
+
685
+ service.initialize().expect("initialize");
686
+
687
+ let stats = service.statistics();
688
+ assert!(stats.statistics.is_some());
689
+ assert!(stats.timestamp.contains('T'));
690
+ let health = service.health();
691
+ assert_eq!(health.status, "ready");
692
+ assert_eq!(health.working_directory, path.display().to_string());
693
+ assert!(health.last_scan_time.is_some());
694
+ fs::remove_dir_all(path).ok();
695
+ }
696
+
697
+ #[test]
698
+ fn server_host_binding_preserves_upstream_display_host() {
699
+ let true_addr = server_bind_address("true", 3000).expect("true host bind");
700
+ assert_eq!(true_addr.ip().to_string(), "0.0.0.0");
701
+ assert_eq!(true_addr.port(), 3000);
702
+ assert_eq!(server_display_url("true", 3000), "http://true:3000");
703
+ assert_eq!(
704
+ server_display_url("localhost", 3001),
705
+ "http://localhost:3001"
706
+ );
707
+ }
708
+
709
+ #[test]
710
+ fn server_check_snippet_reports_codebase_duplications() {
711
+ let path = fixture_project();
712
+ let service = service_for(&path);
713
+ service.initialize().expect("initialize");
714
+
715
+ let response = service
716
+ .check_snippet(CheckSnippetRequest {
717
+ code: "const alpha = 1;\nconst beta = 2;\nconst gamma = alpha + beta;\n"
718
+ .to_string(),
719
+ format: "javascript".to_string(),
720
+ })
721
+ .expect("check snippet");
722
+
723
+ assert!(!response.duplications.is_empty());
724
+ assert_eq!(
725
+ response.statistics.total_duplications,
726
+ response.duplications.len()
727
+ );
728
+ assert!(response.statistics.duplicated_lines > 0);
729
+ assert!(
730
+ response
731
+ .duplications
732
+ .iter()
733
+ .all(|duplication| !duplication.codebase_location.file.starts_with("<snippet>"))
734
+ );
735
+ fs::remove_dir_all(path).ok();
736
+ }
737
+
738
+ #[test]
739
+ fn server_check_snippet_rejects_empty_code() {
740
+ let path = fixture_project();
741
+ let service = service_for(&path);
742
+ service.initialize().expect("initialize");
743
+
744
+ let error = service
745
+ .check_snippet(CheckSnippetRequest {
746
+ code: " ".to_string(),
747
+ format: "javascript".to_string(),
748
+ })
749
+ .expect_err("empty code should fail");
750
+
751
+ assert_eq!(error.to_string(), FIELD_CODE_EMPTY);
752
+ fs::remove_dir_all(path).ok();
753
+ }
754
+
755
+ #[test]
756
+ fn server_recheck_refreshes_statistics() {
757
+ let path = fixture_project();
758
+ let service = service_for(&path);
759
+ service.initialize().expect("initialize");
760
+ let before = service
761
+ .statistics()
762
+ .statistics
763
+ .expect("stats before")
764
+ .total
765
+ .sources;
766
+ fs::write(path.join("c.js"), "const unique = 1;\n").expect("write c.js");
767
+
768
+ service.recheck().expect("recheck");
769
+
770
+ let after = service
771
+ .statistics()
772
+ .statistics
773
+ .expect("stats after")
774
+ .total
775
+ .sources;
776
+ assert!(after > before);
777
+ fs::remove_dir_all(path).ok();
778
+ }
779
+
780
+ #[test]
781
+ fn server_scan_uses_working_directory_over_config_paths_like_upstream() {
782
+ let working = fixture_project();
783
+ let configured = fixture_project();
784
+ fs::write(configured.join("c.js"), "const configured = 1;\n").expect("write c.js");
785
+ let options = Options {
786
+ paths: vec![configured.clone()],
787
+ min_tokens: 5,
788
+ min_lines: 2,
789
+ max_size_bytes: 1024 * 1024,
790
+ ..Options::default()
791
+ };
792
+ let service = ServerService::new(working.clone(), options);
793
+
794
+ service.initialize().expect("initialize");
795
+
796
+ let stats = service.statistics().statistics.expect("statistics");
797
+ assert_eq!(stats.total.sources, 2);
798
+ fs::remove_dir_all(working).ok();
799
+ fs::remove_dir_all(configured).ok();
800
+ }
801
+
802
+ #[tokio::test]
803
+ async fn server_check_snippet_accepts_form_urlencoded_body() {
804
+ let path = fixture_project();
805
+ let service = service_for(&path);
806
+ service.initialize().expect("initialize");
807
+ let app = create_router(service);
808
+ let body = form_urlencoded::Serializer::new(String::new())
809
+ .append_pair(
810
+ "code",
811
+ "const alpha = 1;\nconst beta = 2;\nconst gamma = alpha + beta;\n",
812
+ )
813
+ .append_pair("format", "javascript")
814
+ .finish();
815
+
816
+ let response = app
817
+ .oneshot(
818
+ Request::builder()
819
+ .method("POST")
820
+ .uri("/api/check")
821
+ .header(CONTENT_TYPE, "application/x-www-form-urlencoded")
822
+ .body(Body::from(body))
823
+ .expect("request"),
824
+ )
825
+ .await
826
+ .expect("response");
827
+
828
+ assert_eq!(response.status(), StatusCode::OK);
829
+ let body = to_bytes(response.into_body(), usize::MAX)
830
+ .await
831
+ .expect("body");
832
+ let body: Value = serde_json::from_slice(&body).expect("json body");
833
+ assert!(body["duplications"].is_array());
834
+ assert_eq!(
835
+ body["statistics"]["totalDuplications"].as_u64(),
836
+ body["duplications"]
837
+ .as_array()
838
+ .map(|items| items.len() as u64)
839
+ );
840
+ fs::remove_dir_all(path).ok();
841
+ }
842
+
843
+ #[tokio::test]
844
+ async fn server_check_snippet_invalid_json_matches_upstream_error() {
845
+ let path = fixture_project();
846
+ let service = service_for(&path);
847
+ service.initialize().expect("initialize");
848
+ let app = create_router(service);
849
+
850
+ let response = app
851
+ .oneshot(
852
+ Request::builder()
853
+ .method("POST")
854
+ .uri("/api/check")
855
+ .header(CONTENT_TYPE, "application/json")
856
+ .body(Body::from("invalid-json"))
857
+ .expect("request"),
858
+ )
859
+ .await
860
+ .expect("response");
861
+
862
+ assert_eq!(response.status(), StatusCode::BAD_REQUEST);
863
+ let body = to_bytes(response.into_body(), usize::MAX)
864
+ .await
865
+ .expect("body");
866
+ let body: Value = serde_json::from_slice(&body).expect("json body");
867
+ assert_eq!(body["error"], "SyntaxError");
868
+ assert_eq!(
869
+ body["message"],
870
+ "Unexpected token 'i', \"invalid-json\" is not valid JSON"
871
+ );
872
+ assert_eq!(body["statusCode"], 400);
873
+ fs::remove_dir_all(path).ok();
874
+ }
875
+
876
+ #[tokio::test]
877
+ async fn server_check_snippet_rejects_non_string_format_like_upstream() {
878
+ let path = fixture_project();
879
+ let service = service_for(&path);
880
+ service.initialize().expect("initialize");
881
+ let app = create_router(service);
882
+
883
+ let response = app
884
+ .oneshot(
885
+ Request::builder()
886
+ .method("POST")
887
+ .uri("/api/check")
888
+ .header(CONTENT_TYPE, "application/json")
889
+ .body(Body::from(r#"{"code":"console.log(1);","format":123}"#))
890
+ .expect("request"),
891
+ )
892
+ .await
893
+ .expect("response");
894
+
895
+ assert_eq!(response.status(), StatusCode::BAD_REQUEST);
896
+ let body = to_bytes(response.into_body(), usize::MAX)
897
+ .await
898
+ .expect("body");
899
+ let body: Value = serde_json::from_slice(&body).expect("json body");
900
+ assert_eq!(body["error"], "ValidationError");
901
+ assert_eq!(body["message"], "Field \"format\" must be a string");
902
+ assert_eq!(body["statusCode"], 400);
903
+ fs::remove_dir_all(path).ok();
904
+ }
905
+
906
+ #[tokio::test]
907
+ async fn server_uninitialized_api_matches_upstream_error_shapes() {
908
+ let path = fixture_project();
909
+ let service = service_for(&path);
910
+ let app = create_router(service);
911
+
912
+ let check_response = app
913
+ .clone()
914
+ .oneshot(
915
+ Request::builder()
916
+ .method("POST")
917
+ .uri("/api/check")
918
+ .header(CONTENT_TYPE, "application/json")
919
+ .body(Body::from(
920
+ r#"{"code":"console.log(\"test\");","format":"javascript"}"#,
921
+ ))
922
+ .expect("request"),
923
+ )
924
+ .await
925
+ .expect("response");
926
+
927
+ assert_eq!(check_response.status(), StatusCode::BAD_REQUEST);
928
+ let body = to_bytes(check_response.into_body(), usize::MAX)
929
+ .await
930
+ .expect("body");
931
+ let body: Value = serde_json::from_slice(&body).expect("json body");
932
+ assert_eq!(body["error"], "Error");
933
+ assert_eq!(body["message"], NOT_INITIALIZED);
934
+ assert_eq!(body["statusCode"], 400);
935
+
936
+ let stats_response = app
937
+ .clone()
938
+ .oneshot(
939
+ Request::builder()
940
+ .method("GET")
941
+ .uri("/api/stats")
942
+ .body(Body::empty())
943
+ .expect("request"),
944
+ )
945
+ .await
946
+ .expect("response");
947
+
948
+ assert_eq!(stats_response.status(), StatusCode::SERVICE_UNAVAILABLE);
949
+ let body = to_bytes(stats_response.into_body(), usize::MAX)
950
+ .await
951
+ .expect("body");
952
+ let body: Value = serde_json::from_slice(&body).expect("json body");
953
+ assert_eq!(body["error"], "NotReady");
954
+ assert_eq!(
955
+ body["message"],
956
+ "Statistics not available yet. Server is still initializing."
957
+ );
958
+ assert_eq!(body["statusCode"], 503);
959
+
960
+ let health_response = app
961
+ .oneshot(
962
+ Request::builder()
963
+ .method("GET")
964
+ .uri("/api/health")
965
+ .body(Body::empty())
966
+ .expect("request"),
967
+ )
968
+ .await
969
+ .expect("response");
970
+
971
+ assert_eq!(health_response.status(), StatusCode::OK);
972
+ let body = to_bytes(health_response.into_body(), usize::MAX)
973
+ .await
974
+ .expect("body");
975
+ let body: Value = serde_json::from_slice(&body).expect("json body");
976
+ assert!(matches!(
977
+ body["status"].as_str(),
978
+ Some("ready" | "initializing")
979
+ ));
980
+ assert_eq!(body["workingDirectory"], path.display().to_string());
981
+ assert_eq!(body["lastScanTime"], Value::Null);
982
+ fs::remove_dir_all(path).ok();
983
+ }
984
+
985
+ #[tokio::test]
986
+ async fn server_unknown_routes_return_upstream_style_json_error() {
987
+ let path = fixture_project();
988
+ let service = service_for(&path);
989
+ let app = create_router(service);
990
+
991
+ let response = app
992
+ .oneshot(
993
+ Request::builder()
994
+ .method("GET")
995
+ .uri("/api/unknown?ignored=true")
996
+ .body(Body::empty())
997
+ .expect("request"),
998
+ )
999
+ .await
1000
+ .expect("response");
1001
+
1002
+ assert_eq!(response.status(), StatusCode::NOT_FOUND);
1003
+ let body = to_bytes(response.into_body(), usize::MAX)
1004
+ .await
1005
+ .expect("body");
1006
+ let body: Value = serde_json::from_slice(&body).expect("json body");
1007
+ assert_eq!(body["error"], "NotFound");
1008
+ assert_eq!(body["message"], "Route GET /api/unknown not found");
1009
+ assert_eq!(body["statusCode"], 404);
1010
+ fs::remove_dir_all(path).ok();
1011
+ }
1012
+
1013
+ #[tokio::test]
1014
+ async fn server_wrong_api_methods_return_upstream_style_not_found() {
1015
+ let path = fixture_project();
1016
+ let service = service_for(&path);
1017
+ let app = create_router(service);
1018
+
1019
+ for (method, uri) in [
1020
+ ("GET", "/api/check"),
1021
+ ("GET", "/api/recheck"),
1022
+ ("POST", "/api/stats"),
1023
+ ("POST", "/api/health"),
1024
+ ("PUT", "/api/check"),
1025
+ ("DELETE", "/api/stats"),
1026
+ ] {
1027
+ let response = app
1028
+ .clone()
1029
+ .oneshot(
1030
+ Request::builder()
1031
+ .method(method)
1032
+ .uri(uri)
1033
+ .body(Body::empty())
1034
+ .expect("request"),
1035
+ )
1036
+ .await
1037
+ .expect("response");
1038
+
1039
+ assert_eq!(response.status(), StatusCode::NOT_FOUND);
1040
+ let body = to_bytes(response.into_body(), usize::MAX)
1041
+ .await
1042
+ .expect("body");
1043
+ let body: Value = serde_json::from_slice(&body).expect("json body");
1044
+ assert_eq!(body["error"], "NotFound");
1045
+ assert_eq!(body["message"], format!("Route {method} {uri} not found"));
1046
+ assert_eq!(body["statusCode"], 404);
1047
+ }
1048
+ fs::remove_dir_all(path).ok();
1049
+ }
1050
+
1051
+ #[tokio::test]
1052
+ async fn server_unsupported_mcp_methods_return_upstream_style_not_found() {
1053
+ let path = fixture_project();
1054
+ let service = service_for(&path);
1055
+ let app = create_router(service);
1056
+
1057
+ for method in ["DELETE", "OPTIONS"] {
1058
+ let response = app
1059
+ .clone()
1060
+ .oneshot(
1061
+ Request::builder()
1062
+ .method(method)
1063
+ .uri("/mcp")
1064
+ .body(Body::empty())
1065
+ .expect("request"),
1066
+ )
1067
+ .await
1068
+ .expect("response");
1069
+
1070
+ assert_eq!(response.status(), StatusCode::NOT_FOUND);
1071
+ let body = to_bytes(response.into_body(), usize::MAX)
1072
+ .await
1073
+ .expect("body");
1074
+ let body: Value = serde_json::from_slice(&body).expect("json body");
1075
+ assert_eq!(body["error"], "NotFound");
1076
+ assert_eq!(body["message"], format!("Route {method} /mcp not found"));
1077
+ assert_eq!(body["statusCode"], 404);
1078
+ }
1079
+ fs::remove_dir_all(path).ok();
1080
+ }
1081
+ }