PyPI - corp-extractor - Versions diffs - 0.2.5__py3-none-any.whl → 0.2.11__py3-none-any.whl - Mend

corp-extractor 0.2.5py3-none-any.whl → 0.2.11py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (10) hide show

{corp_extractor-0.2.5.dist-info → corp_extractor-0.2.11.dist-info}/METADATA +19 -11
corp_extractor-0.2.11.dist-info/RECORD +11 -0
statement_extractor/cli.py +31 -1
statement_extractor/extractor.py +77 -5
statement_extractor/models.py +6 -0
statement_extractor/predicate_comparer.py +23 -1
statement_extractor/scoring.py +32 -10
corp_extractor-0.2.5.dist-info/RECORD +0 -11
{corp_extractor-0.2.5.dist-info → corp_extractor-0.2.11.dist-info}/WHEEL +0 -0
{corp_extractor-0.2.5.dist-info → corp_extractor-0.2.11.dist-info}/entry_points.txt +0 -0

{corp_extractor-0.2.5.dist-info → corp_extractor-0.2.11.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: corp-extractor
-Version: 0.2.5
+Version: 0.2.11
 Summary: Extract structured statements from text using T5-Gemma 2 and Diverse Beam Search
 Project-URL: Homepage, https://github.com/corp-o-rate/statement-extractor
 Project-URL: Documentation, https://github.com/corp-o-rate/statement-extractor#readme
@@ -27,7 +27,7 @@ Requires-Dist: click>=8.0.0
 Requires-Dist: numpy>=1.24.0
 Requires-Dist: pydantic>=2.0.0
 Requires-Dist: torch>=2.0.0
-Requires-Dist: transformers>=5.0.0
+Requires-Dist: transformers>=5.0.0rc3
 Provides-Extra: all
 Requires-Dist: sentence-transformers>=2.2.0; extra == 'all'
 Provides-Extra: dev
@@ -65,18 +65,26 @@ Extract structured subject-predicate-object statements from unstructured text us
 ```bash
 # Recommended: include embedding support for smart deduplication
-pip install corp-extractor[embeddings]
+pip install "corp-extractor[embeddings]"
 # Minimal installation (no embedding features)
 pip install corp-extractor
 ```
-**Note**: This package requires the development version of `transformers` from GitHub (for T5-Gemma2 support). This is handled automatically during installation.
+**Note**: This package requires `transformers>=5.0.0` (pre-release) for T5-Gemma2 model support. Install with `--pre` flag if needed:
+```bash
+pip install --pre "corp-extractor[embeddings]"
+```
 **For GPU support**, install PyTorch with CUDA first:
 ```bash
 pip install torch --index-url https://download.pytorch.org/whl/cu121
-pip install corp-extractor[embeddings]
+pip install "corp-extractor[embeddings]"
+```
+**For Apple Silicon (M1/M2/M3)**, MPS acceleration is automatically detected:
+```bash
+pip install "corp-extractor[embeddings]"  # MPS used automatically
 ```
 ## Quick Start
@@ -105,13 +113,13 @@ For best results, install globally first:
 ```bash
 # Using uv (recommended)
-uv tool install corp-extractor[embeddings]
+uv tool install "corp-extractor[embeddings]"
 # Using pipx
-pipx install corp-extractor[embeddings]
+pipx install "corp-extractor[embeddings]"
 # Using pip
-pip install corp-extractor[embeddings]
+pip install "corp-extractor[embeddings]"
 # Then use anywhere
 corp-extractor "Your text here"
@@ -125,7 +133,7 @@ Run directly without installing using [uv](https://docs.astral.sh/uv/):
 uvx corp-extractor "Apple announced a new iPhone."
 ```
-**Note**: uvx runs may be slower on first use as it installs transformers from git.
+**Note**: First run downloads the model (~1.5GB) which may take a few minutes.
 ### Usage Examples
@@ -178,7 +186,7 @@ Options:
   --min-confidence FLOAT       Min confidence filter (default: 0)
   --taxonomy PATH              Load predicate taxonomy from file
   --taxonomy-threshold FLOAT   Taxonomy matching threshold (default: 0.5)
-  --device [auto|cuda|cpu]     Device to use (default: auto)
+  --device [auto|cuda|mps|cpu] Device to use (default: auto)
   -v, --verbose                Show confidence scores and metadata
   -q, --quiet                  Suppress progress messages
   --version                    Show version
@@ -314,7 +322,7 @@ dict_output = extract_statements_as_dict(text)
 ```python
 from statement_extractor import StatementExtractor
-extractor = StatementExtractor(device="cuda")  # or "cpu"
+extractor = StatementExtractor(device="cuda")  # or "mps" (Apple Silicon) or "cpu"
 texts = ["Text 1...", "Text 2...", "Text 3..."]
 for text in texts:

corp_extractor-0.2.11.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,11 @@
+statement_extractor/__init__.py,sha256=MIZgn-lD9-XGJapzdyYxMhEJFRrTzftbRklrhwA4e8w,2967
+statement_extractor/canonicalization.py,sha256=ZMLs6RLWJa_rOJ8XZ7PoHFU13-zeJkOMDnvK-ZaFa5s,5991
+statement_extractor/cli.py,sha256=NIGCpqcnzF42B16RCiSu4kN0RlnVne2ZAT8341Znt1g,8558
+statement_extractor/extractor.py,sha256=r2gcCfZT43Q8STPuzaXmhbjWXTAs4JwMeAtCjQxlsIQ,25870
+statement_extractor/models.py,sha256=IE3TyIiOl2CINPMroQnGT12rSeQFR0bV3y4BJ79wLmI,10877
+statement_extractor/predicate_comparer.py,sha256=jcuaBi5BYqD3TKoyj3pR9dxtX5ihfDJvjdhEd2LHCwc,26184
+statement_extractor/scoring.py,sha256=xs0SxrV42QNBULQguU1-HhcCc-HnS-ekbcdx7FqWGVk,15663
+corp_extractor-0.2.11.dist-info/METADATA,sha256=D-fs9i9kn4v5bRAHCHxI3cq_6vosNgDCN7uuYwVZztM,13775
+corp_extractor-0.2.11.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
+corp_extractor-0.2.11.dist-info/entry_points.txt,sha256=i0iKFqPIusvb-QTQ1zNnFgAqatgVah-jIhahbs5TToQ,115
+corp_extractor-0.2.11.dist-info/RECORD,,

statement_extractor/cli.py CHANGED Viewed

@@ -7,11 +7,36 @@ Usage:
     cat input.txt | corp-extractor -
 """
+import logging
 import sys
 from typing import Optional
 import click
+def _configure_logging(verbose: bool) -> None:
+    """Configure logging for the extraction pipeline."""
+    level = logging.DEBUG if verbose else logging.WARNING
+    # Configure root logger for statement_extractor package
+    logging.basicConfig(
+        level=level,
+        format="%(asctime)s [%(levelname)s] %(name)s: %(message)s",
+        datefmt="%H:%M:%S",
+        stream=sys.stderr,
+        force=True,
+    )
+    # Set level for all statement_extractor loggers
+    for logger_name in [
+        "statement_extractor",
+        "statement_extractor.extractor",
+        "statement_extractor.scoring",
+        "statement_extractor.predicate_comparer",
+        "statement_extractor.canonicalization",
+    ]:
+        logging.getLogger(logger_name).setLevel(level)
 from . import __version__
 from .models import (
     ExtractionOptions,
@@ -47,7 +72,7 @@ from .models import (
 @click.option("--taxonomy", type=click.Path(exists=True), help="Load predicate taxonomy from file (one per line)")
 @click.option("--taxonomy-threshold", type=float, default=0.5, help="Similarity threshold for taxonomy matching (default: 0.5)")
 # Device options
-@click.option("--device", type=click.Choice(["auto", "cuda", "cpu"]), default="auto", help="Device to use (default: auto)")
+@click.option("--device", type=click.Choice(["auto", "cuda", "mps", "cpu"]), default="auto", help="Device to use (default: auto)")
 # Output options
 @click.option("-v", "--verbose", is_flag=True, help="Show verbose output with confidence scores")
 @click.option("-q", "--quiet", is_flag=True, help="Suppress progress messages")
@@ -91,6 +116,9 @@ def main(
         json   JSON with full metadata
         xml    Raw XML from model
     """
+    # Configure logging based on verbose flag
+    _configure_logging(verbose)
     # Determine output format
     if output_json:
         output = "json"
@@ -135,6 +163,7 @@ def main(
         predicate_taxonomy=predicate_taxonomy,
         predicate_config=predicate_config,
         scoring_config=scoring_config,
+        verbose=verbose,
     )
     # Import here to allow --help without loading torch
@@ -160,6 +189,7 @@ def main(
             result = extractor.extract(input_text, options)
             _print_table(result, verbose)
     except Exception as e:
+        logging.exception("Error extracting statements:")
         raise click.ClickException(f"Extraction failed: {e}")

statement_extractor/extractor.py CHANGED Viewed

@@ -80,11 +80,16 @@ class StatementExtractor:
         # Auto-detect device
         if device is None:
-            self.device = "cuda" if torch.cuda.is_available() else "cpu"
+            if torch.cuda.is_available():
+                self.device = "cuda"
+            elif torch.backends.mps.is_available():
+                self.device = "mps"
+            else:
+                self.device = "cpu"
         else:
             self.device = device
-        # Auto-detect dtype
+        # Auto-detect dtype (bfloat16 only for CUDA, float32 for MPS/CPU)
         if torch_dtype is None:
             self.torch_dtype = torch.bfloat16 if self.device == "cuda" else torch.float32
         else:
@@ -175,6 +180,14 @@ class StatementExtractor:
         if options is None:
             options = ExtractionOptions()
+        logger.debug("=" * 60)
+        logger.debug("EXTRACTION STARTED")
+        logger.debug("=" * 60)
+        logger.debug(f"Input text length: {len(text)} chars")
+        logger.debug(f"Options: num_beams={options.num_beams}, diversity={options.diversity_penalty}")
+        logger.debug(f"  merge_beams={options.merge_beams}, embedding_dedup={options.embedding_dedup}")
+        logger.debug(f"  deduplicate={options.deduplicate}, max_new_tokens={options.max_new_tokens}")
         # Store original text for scoring
         original_text = text
@@ -185,6 +198,10 @@ class StatementExtractor:
         # Run extraction with retry logic
         statements = self._extract_with_scoring(text, original_text, options)
+        logger.debug("=" * 60)
+        logger.debug(f"EXTRACTION COMPLETE: {len(statements)} statements")
+        logger.debug("=" * 60)
         return ExtractionResult(
             statements=statements,
             source_text=original_text,
@@ -270,6 +287,10 @@ class StatementExtractor:
         4. Merges top beams or selects best beam
         5. Deduplicates using embeddings (if enabled)
         """
+        logger.debug("-" * 40)
+        logger.debug("PHASE 1: Tokenization")
+        logger.debug("-" * 40)
         # Tokenize input
         inputs = self.tokenizer(
             text,
@@ -278,48 +299,77 @@ class StatementExtractor:
             truncation=True,
         ).to(self.device)
+        input_ids = inputs["input_ids"]
+        logger.debug(f"Tokenized: {input_ids.shape[1]} tokens")
         # Count sentences for quality check
         num_sentences = self._count_sentences(text)
         min_expected = int(num_sentences * options.min_statement_ratio)
-        logger.info(f"Input has ~{num_sentences} sentences, expecting >= {min_expected} statements")
+        logger.debug(f"Input has ~{num_sentences} sentences, min expected: {min_expected}")
         # Get beam scorer
         beam_scorer = self._get_beam_scorer(options)
+        logger.debug("-" * 40)
+        logger.debug("PHASE 2: Diverse Beam Search Generation")
+        logger.debug("-" * 40)
         all_candidates: list[list[Statement]] = []
         for attempt in range(options.max_attempts):
+            logger.debug(f"Attempt {attempt + 1}/{options.max_attempts}: Generating {options.num_beams} beams...")
             # Generate candidate beams
             candidates = self._generate_candidate_beams(inputs, options)
+            logger.debug(f"  Generated {len(candidates)} valid XML outputs")
             # Parse each candidate to statements
             parsed_candidates = []
-            for xml_output in candidates:
+            for i, xml_output in enumerate(candidates):
                 statements = self._parse_xml_to_statements(xml_output)
                 if statements:
                     parsed_candidates.append(statements)
+                    logger.debug(f"  Beam {i}: {len(statements)} statements parsed")
+                else:
+                    logger.debug(f"  Beam {i}: 0 statements (parse failed)")
             all_candidates.extend(parsed_candidates)
             # Check if we have enough statements
             total_stmts = sum(len(c) for c in parsed_candidates)
-            logger.info(f"Attempt {attempt + 1}/{options.max_attempts}: {len(parsed_candidates)} beams, {total_stmts} total statements")
+            logger.debug(f"  Total: {len(parsed_candidates)} beams, {total_stmts} statements")
             if total_stmts >= min_expected:
+                logger.debug(f"  Sufficient statements ({total_stmts} >= {min_expected}), stopping")
                 break
         if not all_candidates:
+            logger.debug("No valid candidates generated, returning empty result")
             return []
+        logger.debug("-" * 40)
+        logger.debug("PHASE 3: Beam Selection/Merging")
+        logger.debug("-" * 40)
         # Select or merge beams
         if options.merge_beams:
+            logger.debug(f"Merging {len(all_candidates)} beams...")
             statements = beam_scorer.merge_beams(all_candidates, original_text)
+            logger.debug(f"  After merge: {len(statements)} statements")
         else:
+            logger.debug(f"Selecting best beam from {len(all_candidates)} candidates...")
             statements = beam_scorer.select_best_beam(all_candidates, original_text)
+            logger.debug(f"  Selected beam has {len(statements)} statements")
+        logger.debug("-" * 40)
+        logger.debug("PHASE 4: Deduplication")
+        logger.debug("-" * 40)
         # Apply embedding-based deduplication if enabled
         if options.embedding_dedup and options.deduplicate:
+            logger.debug("Using embedding-based deduplication...")
+            pre_dedup_count = len(statements)
             try:
                 comparer = self._get_predicate_comparer(options)
                 if comparer:
@@ -327,14 +377,32 @@ class StatementExtractor:
                         statements,
                         entity_canonicalizer=options.entity_canonicalizer
                     )
+                    logger.debug(f"  After embedding dedup: {len(statements)} statements (removed {pre_dedup_count - len(statements)})")
                     # Also normalize predicates if taxonomy provided
                     if options.predicate_taxonomy or self._predicate_taxonomy:
+                        logger.debug("Normalizing predicates to taxonomy...")
                         statements = comparer.normalize_predicates(statements)
             except Exception as e:
                 logger.warning(f"Embedding deduplication failed, falling back to exact match: {e}")
                 statements = self._deduplicate_statements_exact(statements, options)
+                logger.debug(f"  After exact dedup: {len(statements)} statements")
         elif options.deduplicate:
+            logger.debug("Using exact text deduplication...")
+            pre_dedup_count = len(statements)
             statements = self._deduplicate_statements_exact(statements, options)
+            logger.debug(f"  After exact dedup: {len(statements)} statements (removed {pre_dedup_count - len(statements)})")
+        else:
+            logger.debug("Deduplication disabled")
+        # Log final statements
+        logger.debug("-" * 40)
+        logger.debug("FINAL STATEMENTS:")
+        logger.debug("-" * 40)
+        for i, stmt in enumerate(statements):
+            conf = f" (conf={stmt.confidence_score:.2f})" if stmt.confidence_score else ""
+            canonical = f" -> {stmt.canonical_predicate}" if stmt.canonical_predicate else ""
+            logger.debug(f"  {i+1}. {stmt.subject.text} --[{stmt.predicate}{canonical}]--> {stmt.object.text}{conf}")
         return statements
@@ -350,12 +418,16 @@ class StatementExtractor:
             outputs = self.model.generate(
                 **inputs,
                 max_new_tokens=options.max_new_tokens,
+                max_length=None,  # Override model default, use max_new_tokens only
                 num_beams=num_seqs,
                 num_beam_groups=num_seqs,
                 num_return_sequences=num_seqs,
                 diversity_penalty=options.diversity_penalty,
                 do_sample=False,
+                top_p=None,  # Override model config to suppress warning
+                top_k=None,  # Override model config to suppress warning
                 trust_remote_code=True,
+                custom_generate="transformers-community/group-beam-search",
             )
         # Decode and process candidates

statement_extractor/models.py CHANGED Viewed

@@ -280,5 +280,11 @@ class ExtractionOptions(BaseModel):
         description="Use embedding similarity for predicate deduplication"
     )
+    # Verbose logging
+    verbose: bool = Field(
+        default=False,
+        description="Enable verbose logging for debugging"
+    )
     class Config:
         arbitrary_types_allowed = True  # Allow Callable type

statement_extractor/predicate_comparer.py CHANGED Viewed

@@ -83,7 +83,12 @@ class PredicateComparer:
         # Auto-detect device
         if device is None:
             import torch
-            self.device = "cuda" if torch.cuda.is_available() else "cpu"
+            if torch.cuda.is_available():
+                self.device = "cuda"
+            elif torch.backends.mps.is_available():
+                self.device = "mps"
+            else:
+                self.device = "cpu"
         else:
             self.device = device
@@ -289,6 +294,8 @@ class PredicateComparer:
         Returns:
             Deduplicated list of statements (keeps best contextualized match)
         """
+        logger.debug(f"Embedding deduplication: {len(statements)} statements, detect_reversals={detect_reversals}")
         if len(statements) <= 1:
             return statements
@@ -297,27 +304,33 @@ class PredicateComparer:
                 return entity_canonicalizer(text)
             return text.lower().strip()
+        logger.debug("  Computing predicate embeddings...")
         # Compute all predicate embeddings at once for efficiency
         predicates = [s.predicate for s in statements]
         pred_embeddings = self._compute_embeddings(predicates)
+        logger.debug(f"  Computed {len(pred_embeddings)} predicate embeddings")
+        logger.debug("  Computing contextualized embeddings (S P O)...")
         # Compute contextualized embeddings: "Subject Predicate Object" for each statement
         contextualized_texts = [
             f"{s.subject.text} {s.predicate} {s.object.text}" for s in statements
         ]
         contextualized_embeddings = self._compute_embeddings(contextualized_texts)
+        logger.debug("  Computing reversed embeddings (O P S)...")
         # Compute reversed contextualized embeddings: "Object Predicate Subject"
         reversed_texts = [
             f"{s.object.text} {s.predicate} {s.subject.text}" for s in statements
         ]
         reversed_embeddings = self._compute_embeddings(reversed_texts)
+        logger.debug("  Computing source text embeddings...")
         # Compute source text embeddings for scoring which duplicate to keep
         source_embeddings = []
         for stmt in statements:
             source_text = stmt.source_text or f"{stmt.subject.text} {stmt.predicate} {stmt.object.text}"
             source_embeddings.append(self._compute_embeddings([source_text])[0])
+        logger.debug("  All embeddings computed, starting comparison loop...")
         unique_statements: list[Statement] = []
         unique_pred_embeddings: list[np.ndarray] = []
@@ -358,9 +371,17 @@ class PredicateComparer:
                 if similarity >= self.config.dedup_threshold:
                     duplicate_idx = j
                     is_reversed_match = reversed_match and not direct_match
+                    match_type = "reversed" if is_reversed_match else "direct"
+                    logger.debug(
+                        f"  [{i}] DUPLICATE of [{unique_indices[j]}] ({match_type}, sim={similarity:.3f}): "
+                        f"'{stmt.subject.text}' --[{stmt.predicate}]--> '{stmt.object.text}'"
+                    )
                     break
             if duplicate_idx is None:
+                logger.debug(
+                    f"  [{i}] UNIQUE: '{stmt.subject.text}' --[{stmt.predicate}]--> '{stmt.object.text}'"
+                )
                 # Not a duplicate - add to unique list
                 unique_statements.append(stmt)
                 unique_pred_embeddings.append(pred_embeddings[i])
@@ -451,6 +472,7 @@ class PredicateComparer:
                         merged_stmt = existing_stmt.merge_entity_types_from(stmt)
                         unique_statements[duplicate_idx] = merged_stmt
+        logger.debug(f"  Deduplication complete: {len(statements)} -> {len(unique_statements)} statements")
         return unique_statements
     def normalize_predicates(

statement_extractor/scoring.py CHANGED Viewed

@@ -6,10 +6,13 @@ Provides:
 - BeamScorer: Score and select/merge beams based on quality metrics
 """
+import logging
 from typing import Optional
 from .models import ScoringConfig, Statement
+logger = logging.getLogger(__name__)
 class TripleScorer:
     """
@@ -32,6 +35,7 @@ class TripleScorer:
         Higher scores indicate better grounding in source text.
         """
         if not source_text:
+            logger.debug(f"  No source text, returning neutral score 0.5")
             return 0.5  # Neutral score if no source text
         score = 0.0
@@ -53,6 +57,7 @@ class TripleScorer:
         weights_sum += 0.2
         # Check proximity - subject and object in same/nearby region (weight: 0.2)
+        proximity_score = 0.0
         if subject_found and object_found:
             proximity_score = self._compute_proximity(
                 statement.subject.text,
@@ -62,7 +67,14 @@ class TripleScorer:
             score += 0.2 * proximity_score
         weights_sum += 0.2
-        return score / weights_sum if weights_sum > 0 else 0.0
+        final_score = score / weights_sum if weights_sum > 0 else 0.0
+        logger.debug(
+            f"  Score for '{statement.subject.text}' --[{statement.predicate}]--> '{statement.object.text}': "
+            f"{final_score:.2f} (subj={subject_found}, obj={object_found}, pred={predicate_grounded}, prox={proximity_score:.2f})"
+        )
+        return final_score
     def find_evidence_span(
         self,
@@ -347,10 +359,12 @@ class BeamScorer:
             return []
         top_n = top_n or self.config.merge_top_n
+        logger.debug(f"Merging beams: {len(candidates)} candidates, selecting top {top_n}")
         # Score each beam
         scored_beams = []
-        for beam in candidates:
+        for i, beam in enumerate(candidates):
+            logger.debug(f"  Scoring beam {i} ({len(beam)} statements)...")
             for stmt in beam:
                 if stmt.confidence_score is None:
                     stmt.confidence_score = self.triple_scorer.score_triple(stmt, source_text)
@@ -359,31 +373,36 @@ class BeamScorer:
             beam_score = self.score_beam(beam, source_text)
             scored_beams.append((beam_score, beam))
+            logger.debug(f"    Beam {i} score: {beam_score:.3f}")
         # Sort and take top N
         scored_beams.sort(key=lambda x: x[0], reverse=True)
         top_beams = [beam for _, beam in scored_beams[:top_n]]
+        logger.debug(f"  Selected top {len(top_beams)} beams")
         # Pool all triples
         all_statements: list[Statement] = []
         for beam in top_beams:
             all_statements.extend(beam)
+        logger.debug(f"  Pooled {len(all_statements)} statements from top beams")
         # Filter by confidence threshold
         min_conf = self.config.min_confidence
         filtered = [s for s in all_statements if (s.confidence_score or 0) >= min_conf]
+        logger.debug(f"  After confidence filter (>={min_conf}): {len(filtered)} statements")
-        # Filter out statements where source_text doesn't support the predicate
-        # This catches model hallucinations where predicate doesn't match the evidence
-        consistent = [
-            s for s in filtered
-            if self._source_text_supports_predicate(s)
-        ]
+        # # Filter out statements where source_text doesn't support the predicate
+        # # This catches model hallucinations where predicate doesn't match the evidence
+        # consistent = [
+        #     s for s in filtered
+        #     if self._source_text_supports_predicate(s)
+        # ]
+        # logger.debug(f"  After predicate consistency filter: {len(consistent)} statements")
         # Deduplicate - keep highest confidence for each (subject, predicate, object)
         # Note: Same subject+predicate with different objects is valid (e.g., "Apple announced X and Y")
         seen: dict[tuple[str, str, str], Statement] = {}
-        for stmt in consistent:
+        for stmt in all_statements:
             key = (
                 stmt.subject.text.lower(),
                 stmt.predicate.lower(),
@@ -392,7 +411,10 @@ class BeamScorer:
             if key not in seen or (stmt.confidence_score or 0) > (seen[key].confidence_score or 0):
                 seen[key] = stmt
-        return list(seen.values())
+        result = list(seen.values())
+        logger.debug(f"  After deduplication: {len(result)} unique statements")
+        return result
     def _source_text_supports_predicate(self, stmt: Statement) -> bool:
         """

corp_extractor-0.2.5.dist-info/RECORD DELETED Viewed

@@ -1,11 +0,0 @@
-statement_extractor/__init__.py,sha256=MIZgn-lD9-XGJapzdyYxMhEJFRrTzftbRklrhwA4e8w,2967
-statement_extractor/canonicalization.py,sha256=ZMLs6RLWJa_rOJ8XZ7PoHFU13-zeJkOMDnvK-ZaFa5s,5991
-statement_extractor/cli.py,sha256=kJnZm_mbq4np1vTxSjczMZM5zGuDlC8Z5xLJd8O3xZ4,7605
-statement_extractor/extractor.py,sha256=PX0SiJnYUnh06seyH5W77FcPpcvLXwEM8IGsuVuRh0Q,22158
-statement_extractor/models.py,sha256=xDF3pDPhIiqiMwFMPV94aBEgZGbSe-x2TkshahOiCog,10739
-statement_extractor/predicate_comparer.py,sha256=iwBfNJFNOFv8ODKN9F9EtmknpCeSThOpnu6P_PJSmgE,24898
-statement_extractor/scoring.py,sha256=Wa1BW6jXtHD7dZkUXwdwE39hwFo2ko6BuIogBc4E2Lk,14493
-corp_extractor-0.2.5.dist-info/METADATA,sha256=iN_MPbqHhizaFAGJKzR5JNSbDivrS133oSTiYWrFht4,13552
-corp_extractor-0.2.5.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
-corp_extractor-0.2.5.dist-info/entry_points.txt,sha256=i0iKFqPIusvb-QTQ1zNnFgAqatgVah-jIhahbs5TToQ,115
-corp_extractor-0.2.5.dist-info/RECORD,,

{corp_extractor-0.2.5.dist-info → corp_extractor-0.2.11.dist-info}/WHEEL RENAMED Viewed

File without changes

{corp_extractor-0.2.5.dist-info → corp_extractor-0.2.11.dist-info}/entry_points.txt RENAMED Viewed

File without changes

corp-extractor 0.2.5__py3-none-any.whl → 0.2.11__py3-none-any.whl

corp-extractor 0.2.5py3-none-any.whl → 0.2.11py3-none-any.whl