npm - @dependabit/detector - Versions diffs - 0.1.1 - Mend

@dependabit/detector 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (58) hide show

package/CHANGELOG.md +10 -0
package/LICENSE +21 -0
package/README.md +32 -0
package/dist/detector.d.ts +64 -0
package/dist/detector.d.ts.map +1 -0
package/dist/detector.js +578 -0
package/dist/detector.js.map +1 -0
package/dist/diff-parser.d.ts +53 -0
package/dist/diff-parser.d.ts.map +1 -0
package/dist/diff-parser.js +203 -0
package/dist/diff-parser.js.map +1 -0
package/dist/index.d.ts +14 -0
package/dist/index.d.ts.map +1 -0
package/dist/index.js +9 -0
package/dist/index.js.map +1 -0
package/dist/llm/client.d.ts +65 -0
package/dist/llm/client.d.ts.map +1 -0
package/dist/llm/client.js +12 -0
package/dist/llm/client.js.map +1 -0
package/dist/llm/copilot.d.ts +15 -0
package/dist/llm/copilot.d.ts.map +1 -0
package/dist/llm/copilot.js +119 -0
package/dist/llm/copilot.js.map +1 -0
package/dist/llm/prompts.d.ts +10 -0
package/dist/llm/prompts.d.ts.map +1 -0
package/dist/llm/prompts.js +94 -0
package/dist/llm/prompts.js.map +1 -0
package/dist/parsers/code-comments.d.ts +23 -0
package/dist/parsers/code-comments.d.ts.map +1 -0
package/dist/parsers/code-comments.js +139 -0
package/dist/parsers/code-comments.js.map +1 -0
package/dist/parsers/package-files.d.ts +31 -0
package/dist/parsers/package-files.d.ts.map +1 -0
package/dist/parsers/package-files.js +130 -0
package/dist/parsers/package-files.js.map +1 -0
package/dist/parsers/readme.d.ts +23 -0
package/dist/parsers/readme.d.ts.map +1 -0
package/dist/parsers/readme.js +151 -0
package/dist/parsers/readme.js.map +1 -0
package/package.json +41 -0
package/src/detector.ts +746 -0
package/src/diff-parser.ts +257 -0
package/src/index.ts +43 -0
package/src/llm/client.ts +85 -0
package/src/llm/copilot.ts +147 -0
package/src/llm/prompts.ts +102 -0
package/src/parsers/code-comments.ts +178 -0
package/src/parsers/package-files.ts +156 -0
package/src/parsers/readme.ts +185 -0
package/test/detector.test.ts +102 -0
package/test/diff-parser.test.ts +187 -0
package/test/llm/client.test.ts +31 -0
package/test/llm/copilot.test.ts +55 -0
package/test/parsers/code-comments.test.ts +98 -0
package/test/parsers/package-files.test.ts +52 -0
package/test/parsers/readme.test.ts +52 -0
package/tsconfig.json +10 -0
package/tsconfig.tsbuildinfo +1 -0

package/src/diff-parser.ts ADDED Viewed

@@ -0,0 +1,257 @@
+/**
+ * Diff Parser
+ * Parse git diffs to extract meaningful changes for dependency analysis
+ */
+import type { CommitFile } from '@dependabit/github-client';
+export interface DiffParseResult {
+  additions: string[];
+  deletions: string[];
+}
+export interface ExtractedContent {
+  urls: string[];
+  packageDeps: string[];
+}
+export interface ChangedFilesResult {
+  relevantFiles: string[];
+  packageFiles: string[];
+  documentationFiles: string[];
+}
+// URL pattern to match HTTP(S) URLs
+const URL_PATTERN = /https?:\/\/[^\s<>"{}|\\^`[\]]+/gi;
+// Package dependency patterns
+const PACKAGE_DEP_PATTERNS = {
+  packageJson: /"([^"]+)":\s*"[\^~]?[\d.]+"/g,
+  requirementsTxt: /^([a-zA-Z0-9_-]+)[>=<~!]=.*/gm,
+  cargoToml:
+    /^(?!\s*(?:name|version|authors|edition|description|license|workspace|build|default-run|repository|homepage|documentation|readme|keywords|categories|exclude|include|publish|resolver)\s*=)\s*([a-zA-Z0-9_-]+)\s*=.*/gm
+};
+// File extensions relevant for dependency analysis
+const RELEVANT_EXTENSIONS = [
+  '.md',
+  '.txt',
+  '.rst',
+  '.adoc', // Documentation
+  '.ts',
+  '.js',
+  '.py',
+  '.rs',
+  '.go',
+  '.java',
+  '.cpp',
+  '.c',
+  '.h', // Code
+  '.json',
+  '.toml',
+  '.yaml',
+  '.yml', // Config
+  '.html',
+  '.xml' // Markup
+];
+// Package manifest files
+const PACKAGE_MANIFEST_FILES = [
+  'package.json',
+  'requirements.txt',
+  'Cargo.toml',
+  'go.mod',
+  'pom.xml',
+  'build.gradle',
+  'Gemfile',
+  'composer.json'
+];
+/**
+ * Parse a unified diff and extract additions and deletions
+ */
+export function parseDiff(patch: string): DiffParseResult {
+  const additions: string[] = [];
+  const deletions: string[] = [];
+  if (!patch) {
+    return { additions, deletions };
+  }
+  const lines = patch.split('\n');
+  for (const line of lines) {
+    if (line.startsWith('+') && !line.startsWith('+++')) {
+      // Addition (remove the + prefix)
+      additions.push(line.substring(1));
+    } else if (line.startsWith('-') && !line.startsWith('---')) {
+      // Deletion (remove the - prefix)
+      deletions.push(line.substring(1));
+    }
+    // Ignore context lines (no prefix or space prefix)
+  }
+  return { additions, deletions };
+}
+/**
+ * Extract meaningful content from added lines
+ */
+export function extractAddedContent(additions: string[], filename?: string): ExtractedContent {
+  const urls: string[] = [];
+  const packageDeps: string[] = [];
+  const content = additions.join('\n');
+  // Extract URLs
+  const urlMatches = content.matchAll(URL_PATTERN);
+  for (const match of urlMatches) {
+    urls.push(match[0]);
+  }
+  // Extract package dependencies based on file type
+  if (filename) {
+    const lowerFilename = filename.toLowerCase();
+    if (lowerFilename === 'package.json') {
+      const depMatches = content.matchAll(PACKAGE_DEP_PATTERNS.packageJson);
+      for (const match of depMatches) {
+        if (match[1]) packageDeps.push(match[1]);
+      }
+    } else if (lowerFilename === 'requirements.txt') {
+      const depMatches = content.matchAll(PACKAGE_DEP_PATTERNS.requirementsTxt);
+      for (const match of depMatches) {
+        if (match[1]) packageDeps.push(match[1]);
+      }
+    } else if (lowerFilename === 'cargo.toml') {
+      const depMatches = content.matchAll(PACKAGE_DEP_PATTERNS.cargoToml);
+      for (const match of depMatches) {
+        if (match[1]) packageDeps.push(match[1]);
+      }
+    }
+  }
+  return {
+    urls: Array.from(new Set(urls)),
+    packageDeps: Array.from(new Set(packageDeps))
+  };
+}
+/**
+ * Extract meaningful content from removed lines
+ */
+export function extractRemovedContent(deletions: string[], filename?: string): ExtractedContent {
+  // Use the same logic as extractAddedContent
+  return extractAddedContent(deletions, filename);
+}
+/**
+ * Identify files relevant for dependency analysis
+ *
+ * Note: Filenames in relevantFiles preserve their original case from the commit.
+ * Case-insensitive matching is used for identification, but original casing is maintained
+ * for consistency with file system operations.
+ */
+export function getChangedFiles(files: CommitFile[]): ChangedFilesResult {
+  const relevantFiles: string[] = [];
+  const packageFiles: string[] = [];
+  const documentationFiles: string[] = [];
+  for (const file of files) {
+    const filename = file.filename.toLowerCase();
+    const basename = filename.split('/').pop() || '';
+    // Check if it's a package manifest file (case-insensitive comparison)
+    const isPackageFile = PACKAGE_MANIFEST_FILES.some(
+      (manifestFile) => manifestFile.toLowerCase() === basename
+    );
+    if (isPackageFile) {
+      packageFiles.push(file.filename);
+      relevantFiles.push(file.filename);
+      continue;
+    }
+    // Check if it's a documentation file
+    if (
+      basename.startsWith('readme') ||
+      filename.includes('/docs/') ||
+      filename.includes('/documentation/')
+    ) {
+      documentationFiles.push(file.filename);
+      relevantFiles.push(file.filename);
+      continue;
+    }
+    // Check if it has a relevant extension
+    const hasRelevantExtension = RELEVANT_EXTENSIONS.some((ext) => filename.endsWith(ext));
+    if (hasRelevantExtension) {
+      relevantFiles.push(file.filename);
+    }
+  }
+  return {
+    relevantFiles: Array.from(new Set(relevantFiles)),
+    packageFiles: Array.from(new Set(packageFiles)),
+    documentationFiles: Array.from(new Set(documentationFiles))
+  };
+}
+/**
+ * Parse all diffs from commit files
+ */
+export function parseCommitDiffs(files: CommitFile[]): Map<string, DiffParseResult> {
+  const diffMap = new Map<string, DiffParseResult>();
+  for (const file of files) {
+    if (file.patch) {
+      diffMap.set(file.filename, parseDiff(file.patch));
+    }
+  }
+  return diffMap;
+}
+/**
+ * Extract all dependency-related content from commit diffs
+ */
+export function extractDependencyChanges(files: CommitFile[]): {
+  addedUrls: string[];
+  removedUrls: string[];
+  addedPackages: string[];
+  removedPackages: string[];
+  changedFiles: ChangedFilesResult;
+} {
+  const changedFiles = getChangedFiles(files);
+  const allAddedUrls: string[] = [];
+  const allRemovedUrls: string[] = [];
+  const allAddedPackages: string[] = [];
+  const allRemovedPackages: string[] = [];
+  for (const file of files) {
+    if (!file.patch || !changedFiles.relevantFiles.includes(file.filename)) {
+      continue;
+    }
+    const diff = parseDiff(file.patch);
+    const basename = file.filename.split('/').pop();
+    // Extract added content
+    const addedContent = extractAddedContent(diff.additions, basename);
+    allAddedUrls.push(...addedContent.urls);
+    allAddedPackages.push(...addedContent.packageDeps);
+    // Extract removed content
+    const removedContent = extractRemovedContent(diff.deletions, basename);
+    allRemovedUrls.push(...removedContent.urls);
+    allRemovedPackages.push(...removedContent.packageDeps);
+  }
+  return {
+    addedUrls: Array.from(new Set(allAddedUrls)),
+    removedUrls: Array.from(new Set(allRemovedUrls)),
+    addedPackages: Array.from(new Set(allAddedPackages)),
+    removedPackages: Array.from(new Set(allRemovedPackages)),
+    changedFiles
+  };
+}

package/src/index.ts ADDED Viewed

@@ -0,0 +1,43 @@
+// Entry point for @dependabit/detector
+// LLM Provider
+export type {
+  LLMProvider,
+  LLMProviderConfig,
+  LLMResponse,
+  RateLimitInfo,
+  DetectedDependency,
+  LLMUsageMetadata
+} from './llm/client.js';
+export { GitHubCopilotProvider } from './llm/copilot.js';
+export { SYSTEM_PROMPT, createDetectionPrompt, createClassificationPrompt } from './llm/prompts.js';
+// Parsers
+export type { ExtractedReference } from './parsers/readme.js';
+export { parseReadme, extractGitHubReferences } from './parsers/readme.js';
+export type { CommentReference } from './parsers/code-comments.js';
+export { parseCodeComments, extractSpecReferences } from './parsers/code-comments.js';
+export type { PackageMetadata } from './parsers/package-files.js';
+export {
+  parsePackageJson,
+  parseRequirementsTxt,
+  parseCargoToml,
+  parseGoMod
+} from './parsers/package-files.js';
+// Diff Parser
+export type { DiffParseResult, ExtractedContent, ChangedFilesResult } from './diff-parser.js';
+export {
+  parseDiff,
+  extractAddedContent,
+  extractRemovedContent,
+  getChangedFiles,
+  parseCommitDiffs,
+  extractDependencyChanges
+} from './diff-parser.js';
+// Detector
+export type { DetectorOptions, DetectionResult } from './detector.js';
+export { Detector } from './detector.js';

package/src/llm/client.ts ADDED Viewed

@@ -0,0 +1,85 @@
+/**
+ * LLM Provider Interface
+ * Abstraction layer for different LLM providers (GitHub Copilot, Claude, OpenAI, etc.)
+ */
+import type { z } from 'zod';
+export interface RateLimitInfo {
+  remaining: number;
+  limit: number;
+  resetAt: Date;
+}
+export interface LLMUsageMetadata {
+  promptTokens: number;
+  completionTokens: number;
+  totalTokens: number;
+  model: string;
+  latencyMs: number;
+}
+export interface DetectedDependency {
+  url: string;
+  name: string;
+  description?: string;
+  type:
+    | 'reference-implementation'
+    | 'schema'
+    | 'documentation'
+    | 'research-paper'
+    | 'api-example'
+    | 'other';
+  confidence: number; // 0.0 - 1.0
+  reasoning?: string; // Why this was detected as a dependency
+}
+export interface LLMResponse {
+  dependencies: DetectedDependency[];
+  usage: LLMUsageMetadata;
+  rawResponse?: string; // For debugging
+}
+export interface LLMProviderConfig {
+  apiKey?: string;
+  endpoint?: string;
+  model?: string;
+  maxTokens?: number;
+  temperature?: number;
+}
+/**
+ * Base interface that all LLM providers must implement
+ */
+export interface LLMProvider {
+  /**
+   * Analyze content and detect external dependencies
+   * @param content - Text content to analyze (README, code, etc.)
+   * @param prompt - Detection prompt template
+   * @returns LLM response with detected dependencies
+   */
+  analyze(content: string, prompt: string): Promise<LLMResponse>;
+  /**
+   * Get list of supported models for this provider
+   */
+  getSupportedModels(): string[];
+  /**
+   * Get current rate limit status
+   */
+  getRateLimit(): Promise<RateLimitInfo>;
+  /**
+   * Validate provider configuration
+   */
+  validateConfig(): boolean;
+}
+/**
+ * Create an LLM provider instance
+ */
+export function createLLMProvider(providerName: string, config: LLMProviderConfig): LLMProvider {
+  // Implementation will be in specific provider files
+  throw new Error(`Provider ${providerName} not yet implemented`);
+}

package/src/llm/copilot.ts ADDED Viewed

@@ -0,0 +1,147 @@
+/**
+ * GitHub Copilot CLI Provider Implementation
+ * Integrates with GitHub Copilot via CLI commands
+ */
+import { exec } from 'node:child_process';
+import { promisify } from 'node:util';
+import type {
+  LLMProvider,
+  LLMProviderConfig,
+  LLMResponse,
+  RateLimitInfo,
+  DetectedDependency,
+  LLMUsageMetadata
+} from './client.js';
+import { SYSTEM_PROMPT } from './prompts.js';
+const execAsync = promisify(exec);
+export class GitHubCopilotProvider implements LLMProvider {
+  private config: Required<LLMProviderConfig>;
+  private model: string;
+  constructor(config: LLMProviderConfig = {}) {
+    // Default configuration for CLI-based approach
+    this.config = {
+      apiKey: config.apiKey || process.env['GITHUB_TOKEN'] || '',
+      endpoint: config.endpoint || '',
+      model: config.model || 'gpt-4',
+      maxTokens: config.maxTokens || 4000,
+      temperature: config.temperature || 0.3
+    };
+    this.model = this.config.model;
+    // GitHub Copilot CLI uses GitHub authentication, not a separate API key
+    // The GITHUB_TOKEN is used for authentication with GitHub, not OpenAI
+  }
+  async analyze(content: string, prompt: string): Promise<LLMResponse> {
+    const startTime = Date.now();
+    try {
+      // Combine system prompt and user prompt for CLI
+      const fullPrompt = `${SYSTEM_PROMPT}\n\n${prompt}`;
+      // Escape the prompt for shell safety (basic escaping)
+      const escapedPrompt = fullPrompt.replace(/"/g, '\\"').replace(/\$/g, '\\$');
+      // Use gh copilot suggest command to get AI response
+      // The --yes flag auto-accepts the suggestion, --shell-out returns raw output
+      const command = `echo "${escapedPrompt}" | gh copilot suggest --yes 2>&1`;
+      const { stdout, stderr } = await execAsync(command, {
+        maxBuffer: 10 * 1024 * 1024, // 10MB buffer for large responses
+        timeout: 60000 // 60 second timeout
+      });
+      const latencyMs = Date.now() - startTime;
+      if (stderr && !stdout) {
+        throw new Error(`Copilot CLI error: ${stderr}`);
+      }
+      // Try to parse the output as JSON
+      // Copilot CLI may return the JSON directly or wrapped in markdown
+      let content_text = stdout.trim();
+      // Remove markdown code blocks if present
+      if (content_text.includes('```json')) {
+        const jsonMatch = content_text.match(/```json\s*([\s\S]*?)```/);
+        if (jsonMatch && jsonMatch[1]) {
+          content_text = jsonMatch[1].trim();
+        }
+      } else if (content_text.includes('```')) {
+        const codeMatch = content_text.match(/```\s*([\s\S]*?)```/);
+        if (codeMatch && codeMatch[1]) {
+          content_text = codeMatch[1].trim();
+        }
+      }
+      let parsed: { dependencies: DetectedDependency[] };
+      try {
+        parsed = JSON.parse(content_text);
+      } catch (parseError) {
+        console.error('Failed to parse Copilot CLI response:', content_text, parseError);
+        // Return empty dependencies if parsing fails
+        parsed = { dependencies: [] };
+      }
+      // Estimate token usage (rough approximation since CLI doesn't provide this)
+      const estimatedTokens = Math.ceil(fullPrompt.length / 4) + Math.ceil(content_text.length / 4);
+      const usage: LLMUsageMetadata = {
+        promptTokens: Math.ceil(fullPrompt.length / 4),
+        completionTokens: Math.ceil(content_text.length / 4),
+        totalTokens: estimatedTokens,
+        model: this.model,
+        latencyMs
+      };
+      return {
+        dependencies: parsed.dependencies || [],
+        usage,
+        rawResponse: content_text
+      };
+    } catch (error) {
+      const latencyMs = Date.now() - startTime;
+      console.error('Copilot CLI analysis failed:', error);
+      // Return empty result on error
+      return {
+        dependencies: [],
+        usage: {
+          promptTokens: 0,
+          completionTokens: 0,
+          totalTokens: 0,
+          model: this.model,
+          latencyMs
+        },
+        rawResponse: error instanceof Error ? error.message : 'Unknown error'
+      };
+    }
+  }
+  getSupportedModels(): string[] {
+    // Copilot CLI uses GitHub's models, not directly specified
+    return ['github-copilot', 'gpt-4', 'gpt-4-turbo'];
+  }
+  async getRateLimit(): Promise<RateLimitInfo> {
+    // Copilot CLI doesn't expose rate limits directly
+    // Rate limiting is handled by GitHub's infrastructure
+    return {
+      remaining: -1, // Unknown
+      limit: -1, // Unknown
+      resetAt: new Date(0) // Unknown
+    };
+  }
+  validateConfig(): boolean {
+    // For CLI approach, we just need gh CLI to be available
+    // Authentication is handled by GitHub CLI itself
+    return true;
+  }
+}

package/src/llm/prompts.ts ADDED Viewed

@@ -0,0 +1,102 @@
+/**
+ * Detection prompts for LLM-based dependency analysis
+ * Optimized for identifying external informational dependencies
+ */
+export const SYSTEM_PROMPT = `You are an expert at analyzing code repositories to identify external informational dependencies.
+Your task is to identify external resources that developers reference but are NOT tracked by package managers (npm, PyPI, Cargo, etc.).
+INCLUDE these types of dependencies:
+- GitHub repositories referenced but not declared in package files
+- Documentation sites and API references
+- OpenAPI/GraphQL schemas
+- Research papers and arXiv preprints
+- Reference implementations and code examples
+- Technical specifications and RFCs
+EXCLUDE these (handled by dependabot):
+- NPM packages in package.json
+- Python packages in requirements.txt
+- Rust crates in Cargo.toml
+- Docker images in Dockerfile
+- Any declared package manager dependencies
+For each dependency found, provide:
+1. url: The complete URL
+2. name: A descriptive name
+3. description: What this dependency is used for
+4. type: One of [reference-implementation, schema, documentation, research-paper, api-example, other]
+5. confidence: A score from 0.0 to 1.0 indicating detection confidence
+6. reasoning: Brief explanation of why this is a dependency
+Return ONLY valid JSON in this format:
+{
+  "dependencies": [
+    {
+      "url": "https://example.com/resource",
+      "name": "Resource Name",
+      "description": "Purpose in the project",
+      "type": "documentation",
+      "confidence": 0.95,
+      "reasoning": "Referenced in README as API documentation"
+    }
+  ]
+}`;
+export const DETECTION_PROMPT_TEMPLATE = `Analyze the following content from a code repository and identify external informational dependencies:
+## Content Type: {contentType}
+## File Path: {filePath}
+## Content:
+{content}
+Remember:
+- Focus on external resources NOT in package managers
+- Provide confidence scores based on clarity of references
+- Include context about how each dependency is used
+- Return valid JSON only
+Analyze and respond:`;
+export function createDetectionPrompt(
+  contentType: string,
+  filePath: string,
+  content: string
+): string {
+  return DETECTION_PROMPT_TEMPLATE.replace('{contentType}', contentType)
+    .replace('{filePath}', filePath)
+    .replace('{content}', content);
+}
+export const CLASSIFICATION_PROMPT_TEMPLATE = `Given this URL, classify its dependency type and suggest the best access method:
+URL: {url}
+Context: {context}
+Classify as one of:
+- reference-implementation: Example code demonstrating usage
+- schema: OpenAPI, JSON Schema, GraphQL, Protocol Buffers
+- documentation: API docs, tutorials, guides
+- research-paper: Academic papers, arXiv preprints
+- api-example: Code snippets from documentation
+- other: If none of the above fit
+Also determine the best access method:
+- github-api: For GitHub repositories
+- arxiv: For arXiv papers
+- openapi: For OpenAPI specifications
+- http: For generic web content
+Return JSON:
+{
+  "type": "documentation",
+  "accessMethod": "http",
+  "confidence": 0.9,
+  "reasoning": "URL structure suggests API documentation"
+}`;
+export function createClassificationPrompt(url: string, context: string): string {
+  return CLASSIFICATION_PROMPT_TEMPLATE.replace('{url}', url).replace('{context}', context);
+}