renard-pipeline 0.6.3__tar.gz → 0.6.5__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of renard-pipeline might be problematic. Click here for more details.

Files changed (57) hide show
  1. {renard_pipeline-0.6.3 → renard_pipeline-0.6.5}/PKG-INFO +23 -29
  2. {renard_pipeline-0.6.3 → renard_pipeline-0.6.5}/README.md +2 -2
  3. renard_pipeline-0.6.5/pyproject.toml +39 -0
  4. renard_pipeline-0.6.5/renard_pipeline.egg-info/PKG-INFO +117 -0
  5. renard_pipeline-0.6.5/renard_pipeline.egg-info/SOURCES.txt +52 -0
  6. renard_pipeline-0.6.5/renard_pipeline.egg-info/dependency_links.txt +1 -0
  7. renard_pipeline-0.6.5/renard_pipeline.egg-info/requires.txt +14 -0
  8. renard_pipeline-0.6.5/renard_pipeline.egg-info/top_level.txt +1 -0
  9. renard_pipeline-0.6.5/setup.cfg +4 -0
  10. renard_pipeline-0.6.5/tests/test_character_unification.py +16 -0
  11. renard_pipeline-0.6.5/tests/test_corefs.py +15 -0
  12. renard_pipeline-0.6.5/tests/test_graph_extraction.py +121 -0
  13. renard_pipeline-0.6.5/tests/test_graph_utils.py +14 -0
  14. renard_pipeline-0.6.5/tests/test_ner.py +79 -0
  15. renard_pipeline-0.6.5/tests/test_pipeline.py +92 -0
  16. renard_pipeline-0.6.5/tests/test_quote_detection.py +38 -0
  17. renard_pipeline-0.6.5/tests/test_sentiment_analysis.py +14 -0
  18. renard_pipeline-0.6.5/tests/test_stanza.py +20 -0
  19. renard_pipeline-0.6.5/tests/test_tokenization.py +26 -0
  20. renard_pipeline-0.6.5/tests/test_utils.py +15 -0
  21. renard_pipeline-0.6.3/pyproject.toml +0 -40
  22. renard_pipeline-0.6.3/renard/resources/hypocorisms/datas/License.txt +0 -201
  23. renard_pipeline-0.6.3/renard/resources/hypocorisms/datas/hypocorisms.csv +0 -1084
  24. {renard_pipeline-0.6.3 → renard_pipeline-0.6.5}/LICENSE +0 -0
  25. {renard_pipeline-0.6.3 → renard_pipeline-0.6.5}/renard/gender.py +0 -0
  26. {renard_pipeline-0.6.3 → renard_pipeline-0.6.5}/renard/graph_utils.py +0 -0
  27. {renard_pipeline-0.6.3 → renard_pipeline-0.6.5}/renard/ner_utils.py +0 -0
  28. {renard_pipeline-0.6.3 → renard_pipeline-0.6.5}/renard/nltk_utils.py +0 -0
  29. {renard_pipeline-0.6.3 → renard_pipeline-0.6.5}/renard/pipeline/__init__.py +0 -0
  30. {renard_pipeline-0.6.3 → renard_pipeline-0.6.5}/renard/pipeline/character_unification.py +0 -0
  31. {renard_pipeline-0.6.3 → renard_pipeline-0.6.5}/renard/pipeline/characters_extraction.py +0 -0
  32. {renard_pipeline-0.6.3 → renard_pipeline-0.6.5}/renard/pipeline/core.py +0 -0
  33. {renard_pipeline-0.6.3 → renard_pipeline-0.6.5}/renard/pipeline/corefs/__init__.py +0 -0
  34. {renard_pipeline-0.6.3 → renard_pipeline-0.6.5}/renard/pipeline/corefs/corefs.py +0 -0
  35. {renard_pipeline-0.6.3 → renard_pipeline-0.6.5}/renard/pipeline/graph_extraction.py +0 -0
  36. {renard_pipeline-0.6.3 → renard_pipeline-0.6.5}/renard/pipeline/ner/__init__.py +0 -0
  37. {renard_pipeline-0.6.3 → renard_pipeline-0.6.5}/renard/pipeline/ner/ner.py +0 -0
  38. {renard_pipeline-0.6.3 → renard_pipeline-0.6.5}/renard/pipeline/ner/retrieval.py +0 -0
  39. {renard_pipeline-0.6.3 → renard_pipeline-0.6.5}/renard/pipeline/preconfigured.py +0 -0
  40. {renard_pipeline-0.6.3 → renard_pipeline-0.6.5}/renard/pipeline/preprocessing.py +0 -0
  41. {renard_pipeline-0.6.3 → renard_pipeline-0.6.5}/renard/pipeline/progress.py +0 -0
  42. {renard_pipeline-0.6.3 → renard_pipeline-0.6.5}/renard/pipeline/quote_detection.py +0 -0
  43. {renard_pipeline-0.6.3 → renard_pipeline-0.6.5}/renard/pipeline/sentiment_analysis.py +0 -0
  44. {renard_pipeline-0.6.3 → renard_pipeline-0.6.5}/renard/pipeline/speaker_attribution.py +0 -0
  45. {renard_pipeline-0.6.3 → renard_pipeline-0.6.5}/renard/pipeline/stanford_corenlp.py +0 -0
  46. {renard_pipeline-0.6.3 → renard_pipeline-0.6.5}/renard/pipeline/tokenization.py +0 -0
  47. {renard_pipeline-0.6.3 → renard_pipeline-0.6.5}/renard/plot_utils.py +0 -0
  48. {renard_pipeline-0.6.3 → renard_pipeline-0.6.5}/renard/py.typed +0 -0
  49. {renard_pipeline-0.6.3 → renard_pipeline-0.6.5}/renard/resources/determiners/__init__.py +0 -0
  50. {renard_pipeline-0.6.3 → renard_pipeline-0.6.5}/renard/resources/determiners/determiners.py +0 -0
  51. {renard_pipeline-0.6.3 → renard_pipeline-0.6.5}/renard/resources/hypocorisms/__init__.py +0 -0
  52. {renard_pipeline-0.6.3 → renard_pipeline-0.6.5}/renard/resources/hypocorisms/hypocorisms.py +0 -0
  53. {renard_pipeline-0.6.3 → renard_pipeline-0.6.5}/renard/resources/pronouns/__init__.py +0 -0
  54. {renard_pipeline-0.6.3 → renard_pipeline-0.6.5}/renard/resources/pronouns/pronouns.py +0 -0
  55. {renard_pipeline-0.6.3 → renard_pipeline-0.6.5}/renard/resources/titles/__init__.py +0 -0
  56. {renard_pipeline-0.6.3 → renard_pipeline-0.6.5}/renard/resources/titles/titles.py +0 -0
  57. {renard_pipeline-0.6.3 → renard_pipeline-0.6.5}/renard/utils.py +0 -0
@@ -1,35 +1,30 @@
1
- Metadata-Version: 2.1
1
+ Metadata-Version: 2.4
2
2
  Name: renard-pipeline
3
- Version: 0.6.3
3
+ Version: 0.6.5
4
4
  Summary: Relationships Extraction from NARrative Documents
5
- Home-page: https://github.com/CompNet/Renard
5
+ Author-email: Arthur Amalvy <arthur.amalvy@univ-avignon.fr>
6
6
  License: GPL-3.0-only
7
- Author: Arthur Amalvy
8
- Author-email: arthur.amalvy@univ-avignon.fr
9
- Requires-Python: >=3.8,<3.12
10
- Classifier: License :: OSI Approved :: GNU General Public License v3 (GPLv3)
11
- Classifier: Programming Language :: Python :: 3
12
- Classifier: Programming Language :: Python :: 3.8
13
- Classifier: Programming Language :: Python :: 3.9
14
- Classifier: Programming Language :: Python :: 3.10
15
- Classifier: Programming Language :: Python :: 3.11
16
- Requires-Dist: datasets (>=3.0,<4.0)
17
- Requires-Dist: grimbert (>=0.1,<0.2)
18
- Requires-Dist: matplotlib (>=3.5,<4.0)
19
- Requires-Dist: more-itertools (>=10.5,<11.0)
20
- Requires-Dist: nameparser (>=1.1,<2.0)
21
- Requires-Dist: networkx (>=3.0,<4.0)
22
- Requires-Dist: nltk (>=3.9,<4.0)
23
- Requires-Dist: pandas (>=2.0,<3.0)
24
- Requires-Dist: pytest (>=8.3.0,<9.0.0)
25
- Requires-Dist: rank-bm25 (>=0.2.2,<0.3.0)
26
- Requires-Dist: tibert (>=0.5,<0.6)
27
- Requires-Dist: torch (>=2.0.0,!=2.0.1)
28
- Requires-Dist: tqdm (>=4.62.3,<5.0.0)
29
- Requires-Dist: transformers (>=4.37,<5.0)
7
+ Project-URL: Homepage, https://github.com/CompNet/Renard
30
8
  Project-URL: Documentation, https://compnet.github.io/Renard/
31
9
  Project-URL: Repository, https://github.com/CompNet/Renard
10
+ Requires-Python: <3.13,>=3.9
32
11
  Description-Content-Type: text/markdown
12
+ License-File: LICENSE
13
+ Requires-Dist: torch!=2.0.1,>=2.0.0
14
+ Requires-Dist: transformers>=4.37
15
+ Requires-Dist: nltk>=3.9
16
+ Requires-Dist: tqdm>=4.62
17
+ Requires-Dist: networkx>=3.0
18
+ Requires-Dist: more-itertools>=10.5
19
+ Requires-Dist: nameparser>=1.1
20
+ Requires-Dist: matplotlib>=3.5
21
+ Requires-Dist: pandas>=2.0
22
+ Requires-Dist: pytest>=8.3.0
23
+ Requires-Dist: tibert>=0.5
24
+ Requires-Dist: grimbert>=0.1
25
+ Requires-Dist: datasets>=3.0
26
+ Requires-Dist: rank-bm25>=0.2.2
27
+ Dynamic: license-file
33
28
 
34
29
  # Renard
35
30
 
@@ -46,7 +41,7 @@ You can install the latest version using pip:
46
41
 
47
42
  > pip install renard-pipeline
48
43
 
49
- Currently, Renard supports Python>=3.8,<=3.11
44
+ Currently, Renard supports Python>=3.9,<=3.12
50
45
 
51
46
 
52
47
  # Documentation
@@ -90,7 +85,7 @@ For more information, see `renard_tutorial.py`, which is a tutorial in the `jupy
90
85
 
91
86
  `Renard` uses `pytest` for testing. To launch tests, use the following command :
92
87
 
93
- > poetry run python -m pytest tests
88
+ > uv run python -m pytest tests
94
89
 
95
90
  Expensive tests are disabled by default. These can be run by setting the environment variable `RENARD_TEST_ALL` to `1`.
96
91
 
@@ -120,4 +115,3 @@ If you use Renard in your research project, please cite it as follows:
120
115
  ```
121
116
 
122
117
  We would be happy to hear about your usage of Renard, so don't hesitate to reach out!
123
-
@@ -13,7 +13,7 @@ You can install the latest version using pip:
13
13
 
14
14
  > pip install renard-pipeline
15
15
 
16
- Currently, Renard supports Python>=3.8,<=3.11
16
+ Currently, Renard supports Python>=3.9,<=3.12
17
17
 
18
18
 
19
19
  # Documentation
@@ -57,7 +57,7 @@ For more information, see `renard_tutorial.py`, which is a tutorial in the `jupy
57
57
 
58
58
  `Renard` uses `pytest` for testing. To launch tests, use the following command :
59
59
 
60
- > poetry run python -m pytest tests
60
+ > uv run python -m pytest tests
61
61
 
62
62
  Expensive tests are disabled by default. These can be run by setting the environment variable `RENARD_TEST_ALL` to `1`.
63
63
 
@@ -0,0 +1,39 @@
1
+ [project]
2
+ name = "renard-pipeline"
3
+ version = "0.6.5"
4
+ description = "Relationships Extraction from NARrative Documents"
5
+ authors = [
6
+ {name = "Arthur Amalvy", email = "arthur.amalvy@univ-avignon.fr"},
7
+ ]
8
+ license = { text = "GPL-3.0-only" }
9
+ readme = "README.md"
10
+ requires-python = ">=3.9,<3.13"
11
+ dependencies = [
12
+ "torch>=2.0.0,!=2.0.1",
13
+ "transformers>=4.37",
14
+ "nltk>=3.9",
15
+ "tqdm>=4.62",
16
+ "networkx>=3.0",
17
+ "more-itertools>=10.5",
18
+ "nameparser>=1.1",
19
+ "matplotlib>=3.5",
20
+ "pandas>=2.0",
21
+ "pytest>=8.3.0",
22
+ "tibert>=0.5",
23
+ "grimbert>=0.1",
24
+ "datasets>=3.0",
25
+ "rank-bm25>=0.2.2",
26
+ ]
27
+
28
+ [project.urls]
29
+ Homepage = "https://github.com/CompNet/Renard"
30
+ Documentation = "https://compnet.github.io/Renard/"
31
+ Repository = "https://github.com/CompNet/Renard"
32
+
33
+ [dependency-groups]
34
+ dev = [
35
+ "hypothesis>=6.82",
36
+ "Sphinx>=4.3",
37
+ "sphinx-rtd-theme>=1.0.0",
38
+ "sphinx-autodoc-typehints>=1.12.0",
39
+ ]
@@ -0,0 +1,117 @@
1
+ Metadata-Version: 2.4
2
+ Name: renard-pipeline
3
+ Version: 0.6.5
4
+ Summary: Relationships Extraction from NARrative Documents
5
+ Author-email: Arthur Amalvy <arthur.amalvy@univ-avignon.fr>
6
+ License: GPL-3.0-only
7
+ Project-URL: Homepage, https://github.com/CompNet/Renard
8
+ Project-URL: Documentation, https://compnet.github.io/Renard/
9
+ Project-URL: Repository, https://github.com/CompNet/Renard
10
+ Requires-Python: <3.13,>=3.9
11
+ Description-Content-Type: text/markdown
12
+ License-File: LICENSE
13
+ Requires-Dist: torch!=2.0.1,>=2.0.0
14
+ Requires-Dist: transformers>=4.37
15
+ Requires-Dist: nltk>=3.9
16
+ Requires-Dist: tqdm>=4.62
17
+ Requires-Dist: networkx>=3.0
18
+ Requires-Dist: more-itertools>=10.5
19
+ Requires-Dist: nameparser>=1.1
20
+ Requires-Dist: matplotlib>=3.5
21
+ Requires-Dist: pandas>=2.0
22
+ Requires-Dist: pytest>=8.3.0
23
+ Requires-Dist: tibert>=0.5
24
+ Requires-Dist: grimbert>=0.1
25
+ Requires-Dist: datasets>=3.0
26
+ Requires-Dist: rank-bm25>=0.2.2
27
+ Dynamic: license-file
28
+
29
+ # Renard
30
+
31
+ [![DOI](https://joss.theoj.org/papers/10.21105/joss.06574/status.svg)](https://doi.org/10.21105/joss.06574)
32
+
33
+ Renard (Relationship Extraction from NARrative Documents) is a library for creating and using custom character networks extraction pipelines. Renard can extract dynamic as well as static character networks.
34
+
35
+ ![The Renard logo](./docs/renard.svg)
36
+
37
+
38
+ # Installation
39
+
40
+ You can install the latest version using pip:
41
+
42
+ > pip install renard-pipeline
43
+
44
+ Currently, Renard supports Python>=3.9,<=3.12
45
+
46
+
47
+ # Documentation
48
+
49
+ Documentation, including installation instructions, can be found at https://compnet.github.io/Renard/
50
+
51
+ If you need local documentation, it can be generated using `Sphinx`. From the `docs` directory, `make html` should create documentation under `docs/_build/html`.
52
+
53
+
54
+ # Tutorial
55
+
56
+ Renard's central concept is the `Pipeline`.A `Pipeline` is a list of `PipelineStep` that are run sequentially in order to extract a character graph from a document. Here is a simple example:
57
+
58
+ ```python
59
+ from renard.pipeline import Pipeline
60
+ from renard.pipeline.tokenization import NLTKTokenizer
61
+ from renard.pipeline.ner import NLTKNamedEntityRecognizer
62
+ from renard.pipeline.character_unification import GraphRulesCharacterUnifier
63
+ from renard.pipeline.graph_extraction import CoOccurrencesGraphExtractor
64
+
65
+ with open("./my_doc.txt") as f:
66
+ text = f.read()
67
+
68
+ pipeline = Pipeline(
69
+ [
70
+ NLTKTokenizer(),
71
+ NLTKNamedEntityRecognizer(),
72
+ GraphRulesCharacterUnifier(min_appearance=10),
73
+ CoOccurrencesGraphExtractor(co_occurrences_dist=25)
74
+ ]
75
+ )
76
+
77
+ out = pipeline(text)
78
+ ```
79
+
80
+ For more information, see `renard_tutorial.py`, which is a tutorial in the `jupytext` format. You can open it as a notebook in Jupyter Notebook (or export it as a notebook with `jupytext --to ipynb renard-tutorial.py`).
81
+
82
+
83
+
84
+ # Running tests
85
+
86
+ `Renard` uses `pytest` for testing. To launch tests, use the following command :
87
+
88
+ > uv run python -m pytest tests
89
+
90
+ Expensive tests are disabled by default. These can be run by setting the environment variable `RENARD_TEST_ALL` to `1`.
91
+
92
+
93
+ # Contributing
94
+
95
+ see [the "Contributing" section of the documentation](https://compnet.github.io/Renard/contributing.html).
96
+
97
+
98
+ # How to cite
99
+
100
+ If you use Renard in your research project, please cite it as follows:
101
+
102
+ ```bibtex
103
+ @Article{Amalvy2024,
104
+ doi = {10.21105/joss.06574},
105
+ year = {2024},
106
+ publisher = {The Open Journal},
107
+ volume = {9},
108
+ number = {98},
109
+ pages = {6574},
110
+ author = {Amalvy, A. and Labatut, V. and Dufour, R.},
111
+ title = {Renard: A Modular Pipeline for Extracting Character
112
+ Networks from Narrative Texts},
113
+ journal = {Journal of Open Source Software},
114
+ }
115
+ ```
116
+
117
+ We would be happy to hear about your usage of Renard, so don't hesitate to reach out!
@@ -0,0 +1,52 @@
1
+ LICENSE
2
+ README.md
3
+ pyproject.toml
4
+ renard/gender.py
5
+ renard/graph_utils.py
6
+ renard/ner_utils.py
7
+ renard/nltk_utils.py
8
+ renard/plot_utils.py
9
+ renard/py.typed
10
+ renard/utils.py
11
+ renard/pipeline/__init__.py
12
+ renard/pipeline/character_unification.py
13
+ renard/pipeline/characters_extraction.py
14
+ renard/pipeline/core.py
15
+ renard/pipeline/graph_extraction.py
16
+ renard/pipeline/preconfigured.py
17
+ renard/pipeline/preprocessing.py
18
+ renard/pipeline/progress.py
19
+ renard/pipeline/quote_detection.py
20
+ renard/pipeline/sentiment_analysis.py
21
+ renard/pipeline/speaker_attribution.py
22
+ renard/pipeline/stanford_corenlp.py
23
+ renard/pipeline/tokenization.py
24
+ renard/pipeline/corefs/__init__.py
25
+ renard/pipeline/corefs/corefs.py
26
+ renard/pipeline/ner/__init__.py
27
+ renard/pipeline/ner/ner.py
28
+ renard/pipeline/ner/retrieval.py
29
+ renard/resources/determiners/__init__.py
30
+ renard/resources/determiners/determiners.py
31
+ renard/resources/hypocorisms/__init__.py
32
+ renard/resources/hypocorisms/hypocorisms.py
33
+ renard/resources/pronouns/__init__.py
34
+ renard/resources/pronouns/pronouns.py
35
+ renard/resources/titles/__init__.py
36
+ renard/resources/titles/titles.py
37
+ renard_pipeline.egg-info/PKG-INFO
38
+ renard_pipeline.egg-info/SOURCES.txt
39
+ renard_pipeline.egg-info/dependency_links.txt
40
+ renard_pipeline.egg-info/requires.txt
41
+ renard_pipeline.egg-info/top_level.txt
42
+ tests/test_character_unification.py
43
+ tests/test_corefs.py
44
+ tests/test_graph_extraction.py
45
+ tests/test_graph_utils.py
46
+ tests/test_ner.py
47
+ tests/test_pipeline.py
48
+ tests/test_quote_detection.py
49
+ tests/test_sentiment_analysis.py
50
+ tests/test_stanza.py
51
+ tests/test_tokenization.py
52
+ tests/test_utils.py
@@ -0,0 +1,14 @@
1
+ torch!=2.0.1,>=2.0.0
2
+ transformers>=4.37
3
+ nltk>=3.9
4
+ tqdm>=4.62
5
+ networkx>=3.0
6
+ more-itertools>=10.5
7
+ nameparser>=1.1
8
+ matplotlib>=3.5
9
+ pandas>=2.0
10
+ pytest>=8.3.0
11
+ tibert>=0.5
12
+ grimbert>=0.1
13
+ datasets>=3.0
14
+ rank-bm25>=0.2.2
@@ -0,0 +1,4 @@
1
+ [egg_info]
2
+ tag_build =
3
+ tag_date = 0
4
+
@@ -0,0 +1,16 @@
1
+ from renard.pipeline import Mention
2
+ from renard.pipeline.character_unification import (
3
+ Character,
4
+ _assign_coreference_mentions,
5
+ )
6
+
7
+
8
+ def test_assign_coreference_mentions():
9
+ characters = _assign_coreference_mentions(
10
+ [Character(frozenset(["John Traitor"]), [Mention(["John", "Traitor"], 0, 1)])],
11
+ [[Mention(["John", "Traitor"], 0, 1), Mention(["He"], 10, 10)]],
12
+ )
13
+ assert characters[0] == Character(
14
+ frozenset(["John Traitor"]),
15
+ [Mention(["John", "Traitor"], 0, 1), Mention(["He"], 10, 10)],
16
+ )
@@ -0,0 +1,15 @@
1
+ import os
2
+ import pytest
3
+ from renard.pipeline import Pipeline
4
+ from renard.pipeline.corefs import BertCoreferenceResolver
5
+
6
+
7
+ @pytest.mark.skipif(os.getenv("RENARD_TEST_ALL") != "1", reason="performance")
8
+ def test_bert_coreference_resolver_runs():
9
+ pipeline = Pipeline([BertCoreferenceResolver()], progress_report=None)
10
+ tokens = "Princess Liana felt sad , because Zarth Arn was gone . The princess went to sleep .".split(
11
+ " "
12
+ )
13
+ corefs = pipeline(tokens=tokens).corefs
14
+ assert not corefs is None
15
+ assert len(corefs) >= 1
@@ -0,0 +1,121 @@
1
+ from collections import defaultdict
2
+ from typing import List
3
+ import itertools, string
4
+ from hypothesis import given
5
+ from hypothesis.strategies import lists, sampled_from
6
+ from hypothesis.strategies._internal.numbers import integers
7
+ import networkx as nx
8
+ from networkx.algorithms import isomorphism
9
+ from renard.pipeline.graph_extraction import CoOccurrencesGraphExtractor
10
+ from renard.pipeline.character_unification import Character
11
+ from renard.pipeline.ner import ner_entities, NEREntity
12
+
13
+
14
+ def _characters_from_mentions(mentions: List[NEREntity]) -> List[Character]:
15
+ """Generate characters from a list of mentions"""
16
+ name_to_mentions = defaultdict(list)
17
+ for mention in mentions:
18
+ name_to_mentions[" ".join(mention.tokens)].append(mention)
19
+ return [
20
+ Character(frozenset((name,)), mentions)
21
+ for name, mentions in name_to_mentions.items()
22
+ ]
23
+
24
+
25
+ # max size used for performance reasons
26
+ @given(lists(sampled_from(string.ascii_uppercase), max_size=7))
27
+ def test_basic_graph_extraction(tokens: List[str]):
28
+ bio_tags = ["B-PER" for _ in tokens]
29
+
30
+ mentions = ner_entities(tokens, bio_tags)
31
+ characters = _characters_from_mentions(mentions)
32
+
33
+ graph_extractor = CoOccurrencesGraphExtractor(len(tokens))
34
+ out = graph_extractor(set(characters), [tokens])
35
+
36
+ characters = {
37
+ token: Character(
38
+ frozenset([token]), [m for m in mentions if m.tokens[0] == token]
39
+ )
40
+ for token in set(tokens)
41
+ }
42
+
43
+ G = nx.Graph()
44
+ for character in characters.values():
45
+ G.add_node(character)
46
+
47
+ for i, j in itertools.combinations(range(len(tokens)), 2):
48
+ A = characters[tokens[i]]
49
+ B = characters[tokens[j]]
50
+ if A == B:
51
+ continue
52
+ if not G.has_edge(A, B):
53
+ G.add_edge(A, B, weight=0)
54
+ G.edges[A, B]["weight"] += 1
55
+
56
+ assert nx.is_isomorphic(
57
+ out["character_network"],
58
+ G,
59
+ edge_match=isomorphism.numerical_edge_match("weight", 0),
60
+ )
61
+
62
+
63
+ @given(
64
+ lists(sampled_from(string.ascii_uppercase), min_size=1),
65
+ integers(min_value=1, max_value=5),
66
+ )
67
+ def test_dynamic_graph_extraction(tokens: List[str], dynamic_window: int):
68
+ """
69
+ .. note::
70
+
71
+ only tests execution.
72
+ """
73
+ bio_tags = ["B-PER" for _ in tokens]
74
+
75
+ mentions = ner_entities(tokens, bio_tags)
76
+ characters = _characters_from_mentions(mentions)
77
+
78
+ graph_extractor = CoOccurrencesGraphExtractor(
79
+ len(tokens), dynamic=True, dynamic_window=dynamic_window
80
+ )
81
+ out = graph_extractor(set(characters), [tokens])
82
+
83
+ assert len(out["character_network"]) > 0
84
+
85
+
86
+ @given(lists(sampled_from(string.ascii_uppercase)))
87
+ def test_polarity_extraction(tokens: List[str]):
88
+ graph_extractor = CoOccurrencesGraphExtractor(10)
89
+
90
+ bio_tags = ["B-PER"] * len(tokens)
91
+
92
+ mentions = ner_entities(tokens, bio_tags)
93
+ characters = _characters_from_mentions(mentions)
94
+
95
+ out = graph_extractor(
96
+ set(characters),
97
+ sentences=[tokens],
98
+ sentences_polarities=[1.0],
99
+ )
100
+
101
+ for character1, character2 in itertools.combinations(characters, 2):
102
+ if out["character_network"].has_edge(character1, character2):
103
+ assert "polarity" in out["character_network"].edges[character1, character2]
104
+
105
+
106
+ @given(lists(sampled_from(string.ascii_uppercase), min_size=1))
107
+ def test_sent_co_occurence_dist(sent1: List[str]):
108
+ # sent2 is guaranteed to be different from sent1, so that we
109
+ # have 2 different characters
110
+ sent2 = [chr(ord(token) + 1) for token in sent1]
111
+
112
+ graph_extractor = CoOccurrencesGraphExtractor((1, "sentences"))
113
+
114
+ sentences = [sent1, sent2]
115
+ tokens = sent1 + sent2
116
+ tags = ["B-PER"] * len(tokens)
117
+ characters = _characters_from_mentions(ner_entities(tokens, tags))
118
+
119
+ out = graph_extractor(set(characters), sentences)
120
+
121
+ assert len(out["character_network"]) > 0
@@ -0,0 +1,14 @@
1
+ import networkx as nx
2
+ from renard.graph_utils import cumulative_graph
3
+
4
+
5
+ def test_cumulative_graph():
6
+ gs = [
7
+ nx.Graph([(0, 1, {"weight": 1})]),
8
+ nx.Graph([(0, 1, {"weight": 1}), (0, 2, {"weight": 1})]),
9
+ ]
10
+
11
+ assert nx.is_isomorphic(
12
+ cumulative_graph(gs)[-1],
13
+ nx.Graph([(0, 1, {"weight": 2}), (0, 2, {"weight": 1})]),
14
+ )
@@ -0,0 +1,79 @@
1
+ from typing import List, Type
2
+ import string, os
3
+ import pytest
4
+ from hypothesis import given
5
+ from hypothesis.control import assume
6
+ from hypothesis.strategies import lists, sampled_from
7
+ from transformers import BertTokenizerFast
8
+ from renard.ner_utils import NERDataset
9
+ from renard.pipeline.ner import ner_entities, score_ner
10
+ from renard.pipeline.ner.retrieval import (
11
+ NERBM25ContextRetriever,
12
+ NERContextRetriever,
13
+ NEREnsembleContextRetriever,
14
+ NERNeighborsContextRetriever,
15
+ NERSamenounContextRetriever,
16
+ NERNeuralContextRetriever,
17
+ )
18
+
19
+
20
+ @pytest.mark.skipif(
21
+ os.getenv("RENARD_TEST_SEQEVAL_OPTDEP") != "1",
22
+ reason="not testing seqeval based functions",
23
+ )
24
+ @given(lists(sampled_from(("B-PER", "I-PER", "O")), min_size=1))
25
+ def test_score_same_tags(tags: List[str]):
26
+ assume("B-PER" in tags)
27
+ assert (1.0, 1.0, 1.0) == score_ner(tags, tags)
28
+
29
+
30
+ @given(lists(sampled_from(string.ascii_uppercase)))
31
+ def test_has_correct_number_of_entities(tokens: List[str]):
32
+ bio_tags = ["B-PER" for _ in tokens]
33
+ entities = ner_entities(tokens, bio_tags)
34
+ assert len(entities) == len(tokens)
35
+
36
+
37
+ @pytest.mark.skipif(os.getenv("RENARD_TEST_ALL") != "1", reason="performance")
38
+ @pytest.mark.parametrize(
39
+ "retriever_class", [NERSamenounContextRetriever, NERBM25ContextRetriever]
40
+ )
41
+ def test_retrieves_context(retriever_class: Type[NERContextRetriever]):
42
+ context_retriever = retriever_class(1)
43
+ tokenizer = BertTokenizerFast.from_pretrained("bert-base-cased")
44
+ sentences = [
45
+ "this is some test sentence .".split(" "),
46
+ "this is another test sentence .".split(" "),
47
+ ]
48
+ dataset = NERDataset(sentences, tokenizer)
49
+ ctx_dataset = context_retriever(dataset)
50
+ assert ctx_dataset.elements[0] == sentences[0] + sentences[1]
51
+ assert ctx_dataset.elements[1] == sentences[0] + sentences[1]
52
+ assert len(ctx_dataset.elements) == len(sentences)
53
+ assert len(ctx_dataset._context_mask) == len(sentences)
54
+
55
+
56
+ @pytest.mark.skipif(os.getenv("RENARD_TEST_ALL") != "1", reason="performance")
57
+ def test_neural_retrieves_context():
58
+ context_retriever = NERNeuralContextRetriever(
59
+ NEREnsembleContextRetriever(
60
+ [
61
+ NERSamenounContextRetriever(1),
62
+ NERBM25ContextRetriever(1),
63
+ NERNeighborsContextRetriever(2),
64
+ ],
65
+ k=4,
66
+ ),
67
+ k=1,
68
+ )
69
+ tokenizer = BertTokenizerFast.from_pretrained("bert-base-cased")
70
+ sentences = [
71
+ "this is some test sentence .".split(" "),
72
+ "this is another test sentence .".split(" "),
73
+ ]
74
+ dataset = NERDataset(sentences, tokenizer)
75
+ ctx_dataset = context_retriever(dataset)
76
+ assert ctx_dataset.elements[0] == sentences[0] + sentences[1]
77
+ assert ctx_dataset.elements[1] == sentences[0] + sentences[1]
78
+ assert len(ctx_dataset.elements) == len(sentences)
79
+ assert len(ctx_dataset._context_mask) == len(sentences)
@@ -0,0 +1,92 @@
1
+ from typing import Set
2
+ import os
3
+ import pytest
4
+ from renard.pipeline.core import Pipeline, PipelineStep
5
+ from renard.pipeline.preconfigured import bert_pipeline, nltk_pipeline
6
+
7
+
8
+ script_dir = os.path.abspath(os.path.dirname(__file__))
9
+
10
+
11
+ def test_pipeline_is_valid():
12
+ class TestPipelineStep1(PipelineStep):
13
+ def needs(self) -> Set[str]:
14
+ return set()
15
+
16
+ def production(self) -> Set[str]:
17
+ return {"info_1"}
18
+
19
+ class TestPipelineStep2(PipelineStep):
20
+ def needs(self) -> Set[str]:
21
+ return {"info_1"}
22
+
23
+ def production(self) -> Set[str]:
24
+ return set()
25
+
26
+ pipeline = Pipeline([TestPipelineStep1(), TestPipelineStep2()])
27
+
28
+ assert pipeline.check_valid()[0]
29
+
30
+
31
+ def test_pipeline_is_invalid():
32
+ class TestPipelineStep1(PipelineStep):
33
+ def needs(self) -> Set[str]:
34
+ return set()
35
+
36
+ def production(self) -> Set[str]:
37
+ return set()
38
+
39
+ class TestPipelineStep2(PipelineStep):
40
+ def needs(self) -> Set[str]:
41
+ return {"info_1"}
42
+
43
+ def production(self) -> Set[str]:
44
+ return set()
45
+
46
+ pipeline = Pipeline([TestPipelineStep1(), TestPipelineStep2()])
47
+
48
+ assert not pipeline.check_valid()[0]
49
+
50
+
51
+ @pytest.mark.skipif(os.getenv("RENARD_TEST_ALL") != "1", reason="performance")
52
+ def test_nltk_pipeline_runs():
53
+ with open(f"{script_dir}/pp_chapter1.txt") as f:
54
+ text = f.read()
55
+ pipeline = nltk_pipeline(
56
+ warn=False,
57
+ progress_report=None,
58
+ graph_extractor_kwargs={"co_occurrences_dist": (1, "sentences")},
59
+ )
60
+ pipeline(text)
61
+
62
+
63
+ @pytest.mark.skipif(os.getenv("RENARD_TEST_ALL") != "1", reason="performance")
64
+ def test_bert_pipeline_runs():
65
+ with open(f"{script_dir}/pp_chapter1.txt") as f:
66
+ text = f.read()
67
+ pipeline = bert_pipeline(
68
+ warn=False,
69
+ progress_report=None,
70
+ graph_extractor_kwargs={"co_occurrences_dist": (1, "sentences")},
71
+ )
72
+ pipeline(text)
73
+
74
+
75
+ @pytest.mark.skipif(os.getenv("RENARD_TEST_ALL") != "1", reason="performance")
76
+ def test_conversational_pipeline_runs():
77
+
78
+ with open(f"{script_dir}/pp_chapter1.txt") as f:
79
+ text = f.read()
80
+ # if the text is too long, speaker attribution takes a long time
81
+ text = text[:500]
82
+
83
+ pipeline = nltk_pipeline(
84
+ warn=False,
85
+ progress_report=None,
86
+ conversational=True,
87
+ graph_extractor_kwargs={
88
+ "graph_type": "conversation",
89
+ "conversation_dist": (3, "sentences"),
90
+ },
91
+ )
92
+ pipeline(text)