npm - jscpd-rs - Versions diffs - 0.1.0 - Mend

jscpd-rs 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (96) hide show

package/CHANGELOG.md +69 -0
package/Cargo.lock +1323 -0
package/Cargo.toml +54 -0
package/LICENSE +21 -0
package/README.md +372 -0
package/docs/api-parity.md +49 -0
package/docs/cloning-plan.md +281 -0
package/docs/compat-baseline.md +535 -0
package/docs/format-porting.md +86 -0
package/docs/junior-task-template.md +62 -0
package/docs/junior-workflow.md +87 -0
package/docs/migrating-from-jscpd.md +193 -0
package/docs/npm-release.md +116 -0
package/docs/public-benchmark-suite.md +81 -0
package/docs/release-checklist.md +200 -0
package/docs/release-decisions.md +103 -0
package/docs/release-readiness.md +51 -0
package/docs/upstream-bugs.md +501 -0
package/docs/upstream-issue-drafts.md +393 -0
package/docs/user-guide.md +309 -0
package/examples/dump_oxc_tokens.rs +112 -0
package/examples/library_api.rs +42 -0
package/npm/bin/jscpd-rs.js +6 -0
package/npm/bin/jscpd-server.js +6 -0
package/npm/lib/run-binary.js +68 -0
package/npm/scripts/postinstall.js +50 -0
package/package.json +53 -0
package/skills/dry-refactoring/SKILL.md +63 -0
package/skills/jscpd/SKILL.md +85 -0
package/src/app.rs +512 -0
package/src/bin/jscpd-server.rs +429 -0
package/src/blame.rs +130 -0
package/src/cli/config.rs +543 -0
package/src/cli/parsing.rs +301 -0
package/src/cli/tests.rs +543 -0
package/src/cli.rs +671 -0
package/src/detector/matching/secondary.rs +387 -0
package/src/detector/matching.rs +274 -0
package/src/detector/model.rs +190 -0
package/src/detector/prepare.rs +71 -0
package/src/detector/skip_local.rs +40 -0
package/src/detector/statistics.rs +138 -0
package/src/detector/store.rs +96 -0
package/src/detector/tests.rs +238 -0
package/src/detector.rs +265 -0
package/src/files/discovery.rs +508 -0
package/src/files/gitignore.rs +203 -0
package/src/files/paths.rs +68 -0
package/src/files/shebang.rs +106 -0
package/src/files/tests.rs +523 -0
package/src/files.rs +25 -0
package/src/formats.rs +570 -0
package/src/lib.rs +433 -0
package/src/main.rs +26 -0
package/src/report/ai.rs +125 -0
package/src/report/badge.rs +238 -0
package/src/report/console.rs +180 -0
package/src/report/console_common.rs +37 -0
package/src/report/console_full.rs +139 -0
package/src/report/csv.rs +65 -0
package/src/report/escape.rs +8 -0
package/src/report/file_output.rs +28 -0
package/src/report/html/assets.rs +47 -0
package/src/report/html.rs +336 -0
package/src/report/json.rs +119 -0
package/src/report/markdown.rs +125 -0
package/src/report/sarif.rs +302 -0
package/src/report/silent.rs +22 -0
package/src/report/source.rs +38 -0
package/src/report/summary.rs +50 -0
package/src/report/test_support.rs +133 -0
package/src/report/threshold.rs +76 -0
package/src/report/xcode.rs +90 -0
package/src/report/xml.rs +119 -0
package/src/report.rs +250 -0
package/src/server/mcp.rs +942 -0
package/src/server.rs +1081 -0
package/src/tokenizer/apex.rs +97 -0
package/src/tokenizer/blocks.rs +532 -0
package/src/tokenizer/embedded.rs +106 -0
package/src/tokenizer/generic.rs +511 -0
package/src/tokenizer/hash.rs +27 -0
package/src/tokenizer/ignore.rs +33 -0
package/src/tokenizer/line_index.rs +33 -0
package/src/tokenizer/markdown.rs +289 -0
package/src/tokenizer/markup_attrs.rs +289 -0
package/src/tokenizer/oxc/fallback.rs +275 -0
package/src/tokenizer/oxc/jsx.rs +168 -0
package/src/tokenizer/oxc/kind.rs +177 -0
package/src/tokenizer/oxc/lexical.rs +67 -0
package/src/tokenizer/oxc.rs +659 -0
package/src/tokenizer/scan.rs +88 -0
package/src/tokenizer/tap.rs +150 -0
package/src/tokenizer/tests.rs +915 -0
package/src/tokenizer.rs +328 -0
package/src/verbose.rs +195 -0

package/src/detector/model.rs ADDED Viewed

@@ -0,0 +1,190 @@
+use std::collections::{BTreeMap, HashMap};
+use serde::Serialize;
+use crate::tokenizer::Location;
+/// Git blame lines keyed by line number.
+pub type BlamedLines = BTreeMap<String, BlamedLine>;
+/// Git blame information for one duplicated source line.
+#[derive(Clone, Debug, Serialize)]
+pub struct BlamedLine {
+    /// Commit revision.
+    pub rev: String,
+    /// Author name reported by Git.
+    pub author: String,
+    /// Author or commit date reported by Git.
+    pub date: String,
+    /// Source line text.
+    pub line: String,
+}
+#[derive(Clone, Copy, Debug, Eq, Hash, PartialEq)]
+pub(super) struct SourceId(pub(super) usize);
+#[derive(Clone, Copy, Debug, Eq, Hash, PartialEq)]
+pub(super) struct FormatId(pub(super) usize);
+/// One duplicated fragment in a source file.
+#[derive(Clone, Debug, Serialize)]
+pub struct Fragment {
+    #[serde(rename = "sourceId")]
+    /// Source identifier, usually a path.
+    pub source_id: String,
+    /// Start location of the duplicated fragment.
+    pub start: Location,
+    /// End location of the duplicated fragment.
+    pub end: Location,
+    /// Byte range of the duplicated fragment.
+    pub range: [usize; 2],
+    #[serde(skip_serializing_if = "Option::is_none")]
+    /// Optional Git blame information keyed by line number.
+    pub blame: Option<BlamedLines>,
+}
+/// Pair of duplicated fragments reported as one clone.
+#[derive(Clone, Debug, Serialize)]
+pub struct CloneMatch {
+    /// Format name shared by both fragments.
+    pub format: String,
+    #[serde(rename = "duplicationA")]
+    /// First duplicated fragment.
+    pub duplication_a: Fragment,
+    #[serde(rename = "duplicationB")]
+    /// Second duplicated fragment.
+    pub duplication_b: Fragment,
+    /// Number of detection tokens in the clone.
+    pub tokens: usize,
+}
+/// Clone skipped from final output with compatibility/debug messages.
+#[derive(Clone, Debug)]
+pub struct SkippedClone {
+    /// Skipped clone candidate.
+    pub clone: CloneMatch,
+    /// Reason messages explaining why the clone was skipped.
+    pub message: Vec<String>,
+}
+/// Aggregated duplication counters for a source, format, or whole run.
+#[derive(Clone, Debug, Default, Serialize)]
+pub struct StatisticRow {
+    /// Total line count.
+    pub lines: usize,
+    /// Total token count.
+    pub tokens: usize,
+    /// Number of sources included in the row.
+    pub sources: usize,
+    /// Number of clone pairs.
+    pub clones: usize,
+    #[serde(rename = "duplicatedLines")]
+    /// Number of lines covered by at least one clone.
+    pub duplicated_lines: usize,
+    #[serde(rename = "duplicatedTokens")]
+    /// Number of duplicated tokens.
+    pub duplicated_tokens: usize,
+    /// Duplicated line percentage.
+    pub percentage: f64,
+    #[serde(rename = "percentageTokens")]
+    /// Duplicated token percentage.
+    pub percentage_tokens: f64,
+    #[serde(rename = "newDuplicatedLines")]
+    /// New duplicated line count, kept for upstream report shape.
+    pub new_duplicated_lines: usize,
+    #[serde(rename = "newClones")]
+    /// New clone count, kept for upstream report shape.
+    pub new_clones: usize,
+}
+/// Duplication statistics grouped by format.
+#[derive(Clone, Debug, Default, Serialize)]
+pub struct FormatStatistic {
+    /// Per-source statistics for this format.
+    pub sources: HashMap<String, StatisticRow>,
+    /// Total statistics for this format.
+    pub total: StatisticRow,
+}
+/// Duplication statistics for a full detection run.
+#[derive(Clone, Debug, Default, Serialize)]
+pub struct Statistics {
+    /// Total statistics across all formats.
+    pub total: StatisticRow,
+    /// Statistics grouped by format name.
+    pub formats: HashMap<String, FormatStatistic>,
+}
+/// Summary of one analyzed source.
+#[derive(Clone, Debug, Serialize)]
+pub struct SourceSummary {
+    /// Source path or identifier.
+    pub path: String,
+    /// Detected or assigned format.
+    pub format: String,
+    /// Source line count.
+    pub lines: usize,
+    /// Detection token count.
+    pub tokens: usize,
+}
+/// Complete detector output.
+#[derive(Clone, Debug, Serialize)]
+pub struct DetectionResult {
+    /// Reported clone pairs.
+    pub clones: Vec<CloneMatch>,
+    #[serde(skip)]
+    /// Clone candidates skipped from final reports.
+    pub skipped_clones: Vec<SkippedClone>,
+    /// Aggregate statistics.
+    pub statistics: Statistics,
+    /// Analyzed source summaries.
+    pub sources: Vec<SourceSummary>,
+    #[serde(skip)]
+    /// Source contents keyed by source identifier for reporters that need
+    /// fragments.
+    pub source_contents: HashMap<String, String>,
+}
+#[derive(Clone, Debug)]
+pub(super) struct TokenSpan {
+    pub(super) start: Location,
+    pub(super) end: Location,
+    pub(super) range: [usize; 2],
+}
+#[derive(Clone, Debug)]
+pub(super) struct SourceMeta {
+    pub(super) source_id: String,
+    pub(super) format: String,
+    pub(super) content: String,
+    pub(super) lines: usize,
+    pub(super) tokens: usize,
+}
+#[derive(Clone, Debug)]
+pub(super) struct TokenStream {
+    pub(super) source_id: SourceId,
+    pub(super) format_id: FormatId,
+    pub(super) hashes: Vec<u64>,
+    pub(super) spans: Vec<TokenSpan>,
+}
+#[derive(Clone, Copy, Debug)]
+pub(super) struct Occurrence {
+    pub(super) source_id: SourceId,
+    pub(super) token_start: usize,
+}
+#[derive(Clone, Debug)]
+pub(super) struct PreparedSource {
+    pub(super) meta: SourceMeta,
+    pub(super) stream: TokenStream,
+}
+#[derive(Clone, Debug)]
+pub(crate) struct PreparedSourceDraft {
+    pub(super) meta: SourceMeta,
+    pub(super) hashes: Vec<u64>,
+    pub(super) spans: Vec<TokenSpan>,
+}

package/src/detector/prepare.rs ADDED Viewed

@@ -0,0 +1,71 @@
+use rustc_hash::FxHashMap;
+use crate::cli::Options;
+use crate::files::SourceFile;
+use crate::tokenizer::{DetectionToken, tokenize_maps_for_detection};
+use super::model::{FormatId, PreparedSourceDraft, SourceMeta, TokenSpan};
+pub(super) fn assign_formats(files: &[PreparedSourceDraft]) -> (Vec<FormatId>, Vec<String>) {
+    let mut by_name = FxHashMap::default();
+    let mut names = Vec::new();
+    let ids = files
+        .iter()
+        .map(|file| {
+            if let Some(id) = by_name.get(&file.meta.format) {
+                *id
+            } else {
+                let id = FormatId(names.len());
+                by_name.insert(file.meta.format.clone(), id);
+                names.push(file.meta.format.clone());
+                id
+            }
+        })
+        .collect();
+    (ids, names)
+}
+pub(super) fn prepare_file_maps(file: SourceFile, options: &Options) -> Vec<PreparedSourceDraft> {
+    tokenize_maps_for_detection(&file.content, &file.format, options)
+        .into_iter()
+        .map(|map| {
+            let (hashes, spans) = split_tokens(map.tokens);
+            let (stat_lines, stat_tokens) = token_stream_statistics(&spans);
+            PreparedSourceDraft {
+                meta: SourceMeta {
+                    source_id: file.source_id.clone(),
+                    format: map.format,
+                    content: file.content.clone(),
+                    lines: stat_lines,
+                    tokens: stat_tokens,
+                },
+                hashes,
+                spans,
+            }
+        })
+        .collect()
+}
+fn split_tokens(tokens: Vec<DetectionToken>) -> (Vec<u64>, Vec<TokenSpan>) {
+    let mut hashes = Vec::with_capacity(tokens.len());
+    let mut spans = Vec::with_capacity(tokens.len());
+    for token in tokens {
+        hashes.push(token.hash);
+        spans.push(TokenSpan {
+            start: token.start,
+            end: token.end,
+            range: token.range,
+        });
+    }
+    (hashes, spans)
+}
+fn token_stream_statistics(spans: &[TokenSpan]) -> (usize, usize) {
+    match (spans.first(), spans.last()) {
+        (Some(first), Some(last)) => (
+            last.end.line.saturating_sub(first.start.line),
+            last.end.position.saturating_sub(first.start.position),
+        ),
+        _ => (0, 0),
+    }
+}

package/src/detector/skip_local.rs ADDED Viewed

@@ -0,0 +1,40 @@
+use std::path::{Path, PathBuf};
+use crate::cli::Options;
+pub(super) fn same_configured_root(a: &str, b: &str, options: &Options) -> bool {
+    let cwd = std::env::current_dir().unwrap_or_else(|_| PathBuf::from("."));
+    let a = normalize_for_prefix(Path::new(a), &cwd);
+    let b = normalize_for_prefix(Path::new(b), &cwd);
+    options.paths.iter().any(|root| {
+        let root = normalize_for_prefix(root, &cwd);
+        is_under_root(&a, &root) && is_under_root(&b, &root)
+    })
+}
+fn is_under_root(path: &[PathBuf], root: &[PathBuf]) -> bool {
+    path.len() > root.len() && path.starts_with(root)
+}
+fn normalize_for_prefix(path: &Path, cwd: &Path) -> Vec<PathBuf> {
+    let full_path = if path.is_absolute() {
+        path.to_path_buf()
+    } else {
+        cwd.join(path)
+    };
+    let mut normalized = Vec::new();
+    for component in full_path.components() {
+        match component {
+            std::path::Component::CurDir => {}
+            std::path::Component::ParentDir => {
+                normalized.pop();
+            }
+            std::path::Component::Normal(value) => normalized.push(PathBuf::from(value)),
+            std::path::Component::RootDir | std::path::Component::Prefix(_) => {}
+        }
+    }
+    normalized
+}

package/src/detector/statistics.rs ADDED Viewed

@@ -0,0 +1,138 @@
+use super::model::{CloneMatch, StatisticRow, Statistics};
+#[derive(Clone, Debug, Default)]
+pub struct Statistic {
+    statistics: Statistics,
+}
+impl Statistic {
+    pub fn new() -> Self {
+        Self::default()
+    }
+    pub fn get_statistic(&self) -> &Statistics {
+        &self.statistics
+    }
+    pub fn into_statistics(self) -> Statistics {
+        self.statistics
+    }
+    pub fn match_source(
+        &mut self,
+        source_id: impl AsRef<str>,
+        format_name: impl AsRef<str>,
+        lines: usize,
+        tokens: usize,
+    ) {
+        update_source_statistics(
+            &mut self.statistics,
+            source_id.as_ref(),
+            format_name.as_ref(),
+            lines,
+            tokens,
+        );
+        finalize_percentages(&mut self.statistics);
+    }
+    pub fn clone_found(&mut self, clone: &CloneMatch) {
+        update_clone_statistics(&mut self.statistics, clone);
+        finalize_percentages(&mut self.statistics);
+    }
+}
+pub fn clone_lines(clone: &CloneMatch) -> usize {
+    clone
+        .duplication_a
+        .end
+        .line
+        .saturating_sub(clone.duplication_a.start.line)
+        + 1
+}
+pub(super) fn clone_stat_lines(clone: &CloneMatch) -> usize {
+    clone
+        .duplication_a
+        .end
+        .line
+        .saturating_sub(clone.duplication_a.start.line)
+}
+fn clone_stat_tokens(clone: &CloneMatch) -> usize {
+    clone
+        .duplication_a
+        .end
+        .position
+        .saturating_sub(clone.duplication_a.start.position)
+}
+pub(super) fn update_source_statistics(
+    statistics: &mut Statistics,
+    source_id: &str,
+    format_name: &str,
+    lines: usize,
+    tokens: usize,
+) {
+    statistics.total.sources += 1;
+    statistics.total.lines += lines;
+    statistics.total.tokens += tokens;
+    let format = statistics
+        .formats
+        .entry(format_name.to_string())
+        .or_default();
+    format.total.sources += 1;
+    format.total.lines += lines;
+    format.total.tokens += tokens;
+    let source = format.sources.entry(source_id.to_string()).or_default();
+    source.sources = 1;
+    source.lines += lines;
+    source.tokens += tokens;
+}
+pub(super) fn update_clone_statistics(statistics: &mut Statistics, clone: &CloneMatch) {
+    let lines = clone_stat_lines(clone);
+    let tokens = clone_stat_tokens(clone);
+    statistics.total.clones += 1;
+    statistics.total.duplicated_lines += lines;
+    statistics.total.duplicated_tokens += tokens;
+    let format = statistics.formats.entry(clone.format.clone()).or_default();
+    format.total.clones += 1;
+    format.total.duplicated_lines += lines;
+    format.total.duplicated_tokens += tokens;
+    for source_id in [
+        &clone.duplication_a.source_id,
+        &clone.duplication_b.source_id,
+    ] {
+        let source = format.sources.entry(source_id.clone()).or_default();
+        source.clones += 1;
+        source.duplicated_lines += lines;
+        source.duplicated_tokens += tokens;
+    }
+}
+pub(super) fn finalize_percentages(statistics: &mut Statistics) {
+    update_row_percentages(&mut statistics.total);
+    for format in statistics.formats.values_mut() {
+        update_row_percentages(&mut format.total);
+        for source in format.sources.values_mut() {
+            update_row_percentages(source);
+        }
+    }
+}
+fn update_row_percentages(row: &mut StatisticRow) {
+    row.percentage = percentage(row.lines, row.duplicated_lines);
+    row.percentage_tokens = percentage(row.tokens, row.duplicated_tokens);
+}
+fn percentage(total: usize, duplicated: usize) -> f64 {
+    if total == 0 {
+        0.0
+    } else {
+        ((duplicated as f64 * 10000.0) / total as f64).round() / 100.0
+    }
+}

package/src/detector/store.rs ADDED Viewed

@@ -0,0 +1,96 @@
+use std::{collections::HashMap, error::Error, fmt};
+#[derive(Clone, Debug, Eq, PartialEq)]
+pub struct MemoryStoreError {
+    namespace: String,
+    key: String,
+}
+impl MemoryStoreError {
+    pub fn new(namespace: impl Into<String>, key: impl Into<String>) -> Self {
+        Self {
+            namespace: namespace.into(),
+            key: key.into(),
+        }
+    }
+    pub fn namespace(&self) -> &str {
+        &self.namespace
+    }
+    pub fn key(&self) -> &str {
+        &self.key
+    }
+}
+impl fmt::Display for MemoryStoreError {
+    fn fmt(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result {
+        write!(
+            formatter,
+            "key '{}' not found in namespace '{}'",
+            self.key, self.namespace
+        )
+    }
+}
+impl Error for MemoryStoreError {}
+#[derive(Clone, Debug)]
+pub struct MemoryStore<T> {
+    namespace: String,
+    values: HashMap<String, HashMap<String, T>>,
+}
+impl<T> Default for MemoryStore<T> {
+    fn default() -> Self {
+        Self::new()
+    }
+}
+impl<T> MemoryStore<T> {
+    pub fn new() -> Self {
+        Self {
+            namespace: String::new(),
+            values: HashMap::new(),
+        }
+    }
+    pub fn namespace(&mut self, namespace: impl Into<String>) {
+        self.namespace = namespace.into();
+        self.values.entry(self.namespace.clone()).or_default();
+    }
+    pub fn current_namespace(&self) -> &str {
+        &self.namespace
+    }
+    pub fn get(&self, key: impl AsRef<str>) -> Result<&T, MemoryStoreError> {
+        let key = key.as_ref();
+        self.values
+            .get(&self.namespace)
+            .and_then(|namespace| namespace.get(key))
+            .ok_or_else(|| MemoryStoreError::new(self.namespace.clone(), key))
+    }
+    pub fn set(&mut self, key: impl Into<String>, value: T) -> &T {
+        let key = key.into();
+        self.values
+            .entry(self.namespace.clone())
+            .or_default()
+            .entry(key)
+            .insert_entry(value)
+            .into_mut()
+    }
+    pub fn close(&mut self) {
+        self.values.clear();
+    }
+    pub fn is_empty(&self) -> bool {
+        self.values.values().all(HashMap::is_empty)
+    }
+    pub fn len(&self) -> usize {
+        self.values.values().map(HashMap::len).sum()
+    }
+}