bibla 2.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
bibla/__init__.py ADDED
@@ -0,0 +1,6 @@
1
+ """bibla is a minimalistic linter (style checker) for biblatex.
2
+
3
+ bibla with support for libraries managed by JabRef.
4
+ bibla does not come with its own biblatex parser, but leverages the pybtex parser.
5
+ """
6
+ __version__ = "2.0.0"
bibla/__main__.py ADDED
@@ -0,0 +1,121 @@
1
+ """Main CLI script to launch the linter using Click."""
2
+ import os
3
+ import warnings
4
+
5
+ import click
6
+ from bibla import __version__
7
+ from bibla.lint import lint as bibl_lint
8
+ from bibla.config import load_config_file, set_config
9
+ from bibla.rule import load_rules
10
+ from bibla.text_utils import format_rules_markdown_tables
11
+
12
+
13
+ @click.group()
14
+ @click.option('-c', '--config', help='Custom configuration file path.',
15
+ type=str)
16
+ @click.option('--select',
17
+ help='Comma separated list of enabled rules, all other rules '
18
+ 'will be disabled.',
19
+ type=str)
20
+ @click.option('--ignore',
21
+ help='Comma separated list of disabled rules, all other rules '
22
+ 'will be enabled.',
23
+ type=str)
24
+ @click.option('--indent-spaces',
25
+ help='Number of trailing whitespaces for indented line, '
26
+ 'used by TO1.',
27
+ type=int)
28
+ @click.option('--max-line-length',
29
+ help='Max line length before wrap recommended, used by T03.',
30
+ type=int)
31
+ def cli(config, select, ignore, indent_spaces, max_line_length):
32
+ """`bibla` base command line script.
33
+
34
+ Extra configuration options can be specified with command line arguments.
35
+ Hierarchy of configuration options is as follows (higher supersedes lower):
36
+
37
+ Configuration options specified as command line options (e.g. --ignore)
38
+ Configuration file specified with --config
39
+ `bibla.yml` configuration file in the current directory
40
+ `.bibla.yml` configuration file in the current directory
41
+ The default configuration file (in bibla/bibla.yml)
42
+
43
+
44
+ :param see help strings above
45
+ """
46
+ if config is not None:
47
+ load_config_file(config)
48
+ elif os.path.isfile('bibla.yml'):
49
+ load_config_file('bibla.yml')
50
+ elif os.path.isfile('.bibla.yml'):
51
+ load_config_file('.bibla.yml')
52
+
53
+ if select is not None:
54
+ set_config('select', select.split(','))
55
+ if ignore is not None:
56
+ set_config('ignore', ignore.split(','))
57
+ set_config('indent_spaces', indent_spaces)
58
+ set_config('max_line_length', max_line_length)
59
+
60
+
61
+ @cli.command(help="Lint a biblatex bibliography file.")
62
+ @click.argument('bibliography', type=str, nargs=-1)
63
+ def lint(bibliography):
64
+ """CLI command to lint a BibLaTeX file.
65
+
66
+ Use with `bibla lint`.
67
+
68
+ :param see help strings above
69
+ """
70
+ warnings.filterwarnings("ignore")
71
+ for bib in bibliography:
72
+ bibl_lint(bib)
73
+
74
+
75
+ @cli.command(help="Show all available rules.")
76
+ @click.option('-m', 'markdown', help='Format rules as markdown table.',
77
+ is_flag=True)
78
+ def list_all(markdown):
79
+ """CLI command to list all rules generated with de current config.
80
+
81
+ Use with `bibla list-all`.
82
+
83
+ :param see help strings above
84
+ """
85
+ rules = load_rules().all
86
+ if markdown:
87
+ click.echo(format_rules_markdown_tables(rules))
88
+ else:
89
+ for rule in rules:
90
+ click.echo(rule)
91
+
92
+
93
+ @cli.command(help="Show all rules enabled by the configuration.")
94
+ @click.option('-m', 'markdown', help='Format rules as markdown table.',
95
+ is_flag=True)
96
+ def list_enabled(markdown):
97
+ """CLI command to list all enabled rules generated with de current config.
98
+
99
+ Use with `bibla list-enabled`.
100
+
101
+ :param see help strings above
102
+ """
103
+ rules = load_rules().enabled
104
+ if markdown:
105
+ click.echo(format_rules_markdown_tables(rules))
106
+ else:
107
+ for rule in rules:
108
+ click.echo(rule)
109
+
110
+
111
+ @cli.command(help="Show the package version.")
112
+ def version():
113
+ """CLI command to print the version number.
114
+
115
+ Use with `bibla version`.
116
+ """
117
+ click.echo('bibla version: ' + __version__)
118
+
119
+
120
+ if __name__ == '__main__':
121
+ cli(prog_name='bibla')
bibla/bibla.yml ADDED
@@ -0,0 +1,85 @@
1
+ ---
2
+ ## DEFAULT bibla CONFIG
3
+ select: [ ] # List of enabled rules, all other rules will be disabled
4
+ ignore: [ D00, T00, T02, T03, U01* ] # List of disabled rules, all other rules will be enabled - The disabled rules from bibl just seem to make less sense for Bibla. Personal choices may apply.
5
+ # Specify max 1 of the two options above. If none are specified, all rules will be enabled
6
+ indent_spaces: 2 # number of trailing whitespaces for indented line, used by TO1
7
+ max_line_length: 120 # max line length before wrap optional, used by T03
8
+ abbreviation_dot: True # abbreviate middle names with dot (John F. Kennedy) as opposed to without (John F Kennedy), used by E02
9
+
10
+ # Specification from https://en.wikipedia.org/wiki/bibtex extended with fields used in JabRef.
11
+ # This specification is used to generate M01 and U01 rules
12
+ # These have been further adapted to work with BibLaTeX entry types.
13
+ type_spec:
14
+ incollection: # A part of a book having its own title.
15
+ required: [ title, booktitle, publisher, year ]
16
+ optional: [ volume, number, series, type, chapter, pages, address, edition, month, key, file ]
17
+ unpublished: # A document having an author and title, but not formally published.
18
+ required: [title]
19
+ optional: [month, year, key, file]
20
+
21
+ # For HOGENT students. - previously used ones have been disabled above.
22
+ article:
23
+ required: [title, journaltitle, date]
24
+ optional: [doi, volume, number, pages]
25
+ book:
26
+ required: [title, date, publisher]
27
+ optional: [isbn]
28
+ inbook:
29
+ required: [title, booktitle, date, publisher]
30
+ optional: [isbn ,doi, pages]
31
+ booklet:
32
+ required: [title, date, publisher]
33
+ optional: [isbn , doi, url, howpublished]
34
+ dataset:
35
+ required: [title, date, url, urldate]
36
+ optional: []
37
+ manual:
38
+ required: [ title, date]
39
+ optional: [organization, publisher, isbn, doi, url ]
40
+ software:
41
+ required: [ title, date]
42
+ optional: []
43
+ misc:
44
+ required: [ title, date]
45
+ optional: []
46
+ online:
47
+ required: [ title, date, url, urldate]
48
+ optional: []
49
+ electronic:
50
+ required: [ title, date, url, urldate]
51
+ optional: []
52
+ www:
53
+ required: [ title, date, url, urldate]
54
+ optional: []
55
+ inproceedings:
56
+ required: [title, booktitle, date]
57
+ optional: [ eventtitle, isbn, doi, url]
58
+ conference:
59
+ required: [title, booktitle, date]
60
+ optional: [ eventtitle, isbn, doi, url]
61
+ report:
62
+ required: [title, date, type, institution]
63
+ optional: [doi, url]
64
+ techreport:
65
+ required: [title, date, type, institution]
66
+ optional: [doi, url]
67
+ thesis:
68
+ required: [title, date, type, institution]
69
+ optional: [url]
70
+ mastersthesis:
71
+ required: [title, date, type, institution]
72
+ optional: [url]
73
+ phdthesis:
74
+ required: [title, date, type, institution]
75
+ optional: [url]
76
+
77
+ alias_entry_types:
78
+ misc: [software]
79
+ online: [electronic, www]
80
+ inproceedings: [conference]
81
+ report: [techreport]
82
+ thesis: [mastersthesis, phdthesis]
83
+
84
+ alternate_fields:
85
+ date: [year] # Month and day will not be used alone, so when we just check for the year, that'll be fine.
bibla/config.py ADDED
@@ -0,0 +1,64 @@
1
+ """Linter configuration logic."""
2
+ from typing import Dict, Any
3
+
4
+ import pkg_resources
5
+ import yaml
6
+
7
+ _config = dict()
8
+ _read_default = False
9
+
10
+ _DEFAULT_CONFIG_FILE = 'bibla.yml'
11
+
12
+
13
+ def get_config() -> Dict:
14
+ """Return the loaded config or instantiate and return default config."""
15
+ if not _read_default:
16
+ _load_default_config()
17
+ return _config
18
+
19
+
20
+ def set_config(key: str, value: Any, default: bool = False):
21
+ """Set a value in the configurations.
22
+
23
+ :param key: configuration entry key
24
+ :param value: configuration entry value
25
+ :param default: If true, set as default value to be overwritten by later
26
+ """
27
+ if value is not None:
28
+ if default:
29
+ _config.setdefault(key, value)
30
+ else:
31
+ _config[key] = value
32
+ _validate_and_clean_config(_config)
33
+
34
+
35
+ def load_config_file(file):
36
+ """Read a YAML config file and use it as the configuration.
37
+
38
+ :param file: .yaml config file path
39
+ """
40
+ with open(file) as config_file:
41
+ config = yaml.load(config_file, Loader=yaml.FullLoader)
42
+ for k, v in config.items():
43
+ set_config(k, v)
44
+
45
+
46
+ def _load_default_config():
47
+ global _read_default
48
+ _read_default = True
49
+ with open(pkg_resources.resource_filename(__name__,
50
+ _DEFAULT_CONFIG_FILE)) as \
51
+ default_config_file:
52
+ default_config = yaml.load(default_config_file, Loader=yaml.FullLoader)
53
+ for k, v in default_config.items():
54
+ set_config(k, v, default=True)
55
+
56
+
57
+ def _validate_and_clean_config(config):
58
+ if 'select' in config and 'ignore' in config and config['select'] and \
59
+ config['ignore']:
60
+ raise ValueError(
61
+ "Configuration cannot contain both included and selected and "
62
+ "ignored rules. Use either include or exclude to select"
63
+ "enabled rules."
64
+ )
bibla/lint.py ADDED
@@ -0,0 +1,130 @@
1
+ """Main linter logic."""
2
+ import logging
3
+ import sys
4
+ from dataclasses import dataclass
5
+ from typing import List, Iterable
6
+ from pybtex.database import BibliographyDataError
7
+ import re
8
+
9
+ import pybtex
10
+ from pybtex.database import parse_file
11
+ from bibla.rule import load_rules, Rule, EntryRule, TextRule
12
+ from bibla.text_utils import find_entry_line_number, MONTH_NAMES
13
+
14
+ logger = logging.getLogger()
15
+ logger.setLevel(logging.WARNING)
16
+
17
+ handler = logging.StreamHandler(sys.stdout)
18
+ handler.setLevel(logging.WARNING)
19
+ logger.addHandler(handler)
20
+
21
+
22
+ @dataclass
23
+ class LintWarning:
24
+ """Dataclass to represent and report a linter rule violation."""
25
+
26
+ # file path of the detected violation
27
+ file: str
28
+ # line number of the detected violation
29
+ line: int
30
+ # the violated rule
31
+ rule: Rule
32
+
33
+ def log(self):
34
+ """Print the warning with details to stdout."""
35
+ msg = "{}:{} {}".format(self.file, self.line, str(self.rule))
36
+ logger.warning(msg)
37
+
38
+
39
+ def lint(bibliography: str, verbose: bool = True) -> List[LintWarning]:
40
+ """Execute the main linter program.
41
+
42
+ The linter will first scan the bibliography text file and check all text
43
+ rules for each line in the text file. Next, the file will be parsed by the
44
+ pybtex parser and all entry rules will be checked.
45
+
46
+ :param bibliography: a .bib bibliography file path
47
+ :param verbose: log linter warnings to stdout
48
+ :return: a list of LintWarning objects representing the linter violations
49
+ found while running
50
+ """
51
+ try:
52
+ bib_data = parse_file(bibliography, macros=MONTH_NAMES)
53
+ except BibliographyDataError as e:
54
+ # Extract the key from the error message
55
+ match = re.search(r'repeated bibliograhpy entry: (.*)', str(e))
56
+ if match:
57
+ duplicate_key = match.group(1)
58
+ print(f"{bibliography} D03: Duplicate entry with key '{duplicate_key}'")
59
+ else:
60
+ print(f"Warning: {e}")
61
+ return []
62
+ except pybtex.scanner.TokenRequired as e:
63
+ print(f"E00: {e}")
64
+ return []
65
+ except Error as e:
66
+ print(f"Error: {e}")
67
+ return []
68
+
69
+ bib_data.file = bibliography
70
+ with open(bibliography, 'r') as bib_file:
71
+ bib_text = bib_file.read()
72
+
73
+ rules = load_rules()
74
+
75
+ text_warnings = _apply_text_rules(bibliography, bib_text,
76
+ rules.enabled_text_rules)
77
+ entry_warnings = _apply_entry_rules(bibliography, bib_data, bib_text,
78
+ rules.enabled_entry_rules)
79
+
80
+ warnings = text_warnings + entry_warnings
81
+
82
+ warnings.sort(key=lambda w: w.rule.rule_id)
83
+ warnings.sort(key=lambda w: w.line)
84
+ if verbose:
85
+ for warning in warnings:
86
+ warning.log()
87
+ return warnings
88
+
89
+
90
+ def _apply_text_rules(bibliography: str, bib_text: str,
91
+ text_rules: Iterable[TextRule]) -> List[LintWarning]:
92
+ """Check all text rules in the bibliography text file.
93
+
94
+ :param bibliography: bibliography file path
95
+ :param bib_text: bibliography file contents
96
+ :param text_rules: list of text rules to be evaluated on each line of
97
+ the file
98
+ :return: a list of LinterWarnings representing found rule violations
99
+ """
100
+ warnings = []
101
+ for i, line in enumerate(bib_text.split('\n')):
102
+ line_number = i + 1
103
+ for rule in text_rules:
104
+ result = rule(line_number, line, bib_text)
105
+ if not result:
106
+ warnings.append(LintWarning(bibliography, line_number, rule))
107
+ return warnings
108
+
109
+
110
+ def _apply_entry_rules(bibliography: str,
111
+ bib_data: pybtex.database.BibliographyData,
112
+ bib_text: str, entry_rules: Iterable[EntryRule]) \
113
+ -> List[LintWarning]:
114
+ """Check all entry rules in the bibliography text file.
115
+
116
+ :param bibliography: bibliography file path
117
+ :param bib_data: parsed bibliography data containing entries
118
+ :param bib_text: bibliography file contents
119
+ :param text_rules: list of text rules to be evaluated on each line of
120
+ the file
121
+ :return: a list of LinterWarnings representing found rule violations
122
+ """
123
+ warnings = []
124
+ for key, entry in bib_data.entries.items():
125
+ for rule in entry_rules:
126
+ line_number, offset = find_entry_line_number(bib_text, key)
127
+ result = rule(key, entry, bib_data)
128
+ if not result:
129
+ warnings.append(LintWarning(bibliography, line_number, rule))
130
+ return warnings
bibla/rule.py ADDED
@@ -0,0 +1,158 @@
1
+ """Structure to manage rules in a unified way."""
2
+ import fnmatch
3
+ import os
4
+ from typing import Callable, Dict, List
5
+ from pybtex.database import Entry
6
+ from bibla.config import get_config
7
+
8
+
9
+ class Rule:
10
+ """Generic rule class.
11
+
12
+ :param rule_id: identifying code for this rule, starting with a letter
13
+ indicating the rule type, followed by a number or a string specification.
14
+ :param description: a full sentence description of the rule
15
+ """
16
+
17
+ def __init__(self, rule_id: str, description: str, rule: Callable):
18
+ """Create Rule object.
19
+
20
+ :param rule_id: identifying code for this rule, starting with a letter
21
+ indicating the rule type, followed by a number or a string
22
+ specification.
23
+ :param description: a full sentence description of the rule
24
+ :param rule: a callable returning a boolean to execute when checking
25
+ this rule
26
+ """
27
+ self.rule_id = rule_id
28
+ self.description = description
29
+ self._rule = rule
30
+
31
+ def __str__(self):
32
+ """Return rule as string representation."""
33
+ return "{}: {}".format(self.rule_id, self.description)
34
+
35
+ def __call__(self, *args, **kwargs):
36
+ """Handle Rule objects as callables.
37
+
38
+ When calling a rule, it is evaluated over (a part of) the bibliography.
39
+ :param args, kwargs: Rule arguments
40
+ :return: True if the bibliography is consistent with the rule, False if
41
+ the rule is violated.
42
+ """
43
+ return self._rule(*args, **kwargs)
44
+
45
+ @property
46
+ def enabled(self):
47
+ """Evaluate wheter a rule should be checked this run.
48
+
49
+ :return: True if the rule should be checked based on the configuration
50
+ used for running the linter, False otherwise
51
+ """
52
+ if get_config()['select']:
53
+ for pattern in get_config()['select']:
54
+ if fnmatch.fnmatch(self.rule_id, pattern):
55
+ return True
56
+ return False
57
+ if get_config()['ignore']:
58
+ for pattern in get_config()['ignore']:
59
+ if fnmatch.fnmatch(self.rule_id, pattern):
60
+ return False
61
+ return True
62
+ return True
63
+
64
+
65
+ class EntryRule(Rule):
66
+ """Rule type evaluating a parsed bibliography entry."""
67
+
68
+ def __init__(self, rule_id, description,
69
+ rule: Callable[[str, Entry, Dict[str, Entry]], bool]):
70
+ """Create EntryRule object.
71
+
72
+ :param key: The key of the current bibliography entry
73
+ :param entry: The current bibliography entry
74
+ :param database: All bibliography entries
75
+ """
76
+ super().__init__(rule_id, description, rule)
77
+
78
+
79
+ class TextRule(Rule):
80
+ """Rule type evaluating a text line in the bibliography entry."""
81
+
82
+ def __init__(self, rule_id, description,
83
+ rule: Callable[[int, str, str], bool]):
84
+ """Create TextRule object.
85
+
86
+ :param line_number: The number of the current line in the bibliography
87
+ :param line: The content of the current line in the bibliography
88
+ :param text: The entire bibliography
89
+ """
90
+ super().__init__(rule_id, description, rule)
91
+
92
+
93
+ class RuleStore:
94
+ """Container for all loaded rules."""
95
+
96
+ def __init__(self):
97
+ """Create RuleStore object."""
98
+ self._rules = []
99
+
100
+ def register(self, rule: Rule):
101
+ """Register a new rule.
102
+
103
+ :param: a Rule object to register
104
+ """
105
+ position = 0
106
+ while position < len(self._rules) \
107
+ and self._rules[position].rule_id < rule.rule_id:
108
+ position += 1
109
+ self._rules.insert(position, rule)
110
+
111
+ @property
112
+ def all(self) -> List[Rule]:
113
+ """Return all loaded rules."""
114
+ return self._rules
115
+
116
+ @property
117
+ def enabled(self) -> List[Rule]:
118
+ """Return all loaded rules enabled by the configuration."""
119
+ return [rule for rule in self._rules if rule.enabled]
120
+
121
+ @property
122
+ def enabled_entry_rules(self) -> List[EntryRule]:
123
+ """Return all loaded entry rules."""
124
+ return [rule for rule in self.enabled if isinstance(rule, EntryRule)]
125
+
126
+ @property
127
+ def enabled_text_rules(self) -> List[TextRule]:
128
+ """Return all loaded text rules."""
129
+ return [rule for rule in self.enabled if isinstance(rule, TextRule)]
130
+
131
+
132
+ _ALL_RULES: RuleStore = RuleStore()
133
+
134
+
135
+ def register_entry_rule(rule_id, description: str) -> Callable:
136
+ """Register a function as an entry rule."""
137
+ def decorator(f: Callable[[str, Entry, Dict[str, Entry]], bool]):
138
+ rule = EntryRule(rule_id, description, f)
139
+ _ALL_RULES.register(rule)
140
+ return decorator
141
+
142
+
143
+ def register_text_rule(rule_id: str, description: str) -> Callable:
144
+ """Register a function as a text rule."""
145
+ def decorator(f: Callable[[int, str, str], bool]):
146
+ rule = TextRule(rule_id, description, f)
147
+ _ALL_RULES.register(rule)
148
+ return decorator
149
+
150
+
151
+ def load_rules() -> RuleStore:
152
+ """Import all modules in the `bibla/rules` package."""
153
+ for module in os.listdir(os.path.join(os.path.dirname(__file__), 'rules')):
154
+ if module == '__init__.py' or module[-3:] != '.py':
155
+ continue
156
+ __import__('bibla.rules.' + module[:-3], locals(), globals())
157
+ del module
158
+ return _ALL_RULES
@@ -0,0 +1,6 @@
1
+ """Package that contains modules with linter rules.
2
+
3
+ Modules in this package will automatically be imported when loading rules.
4
+ If these modules contain linter rule functions registered with
5
+ `@register_entry_rule`, they will be added to the linter.
6
+ """
@@ -0,0 +1,59 @@
1
+ """Linter rules checking the consistency of the entire BibLaTeX file."""
2
+ import re
3
+ from fuzzywuzzy import fuzz
4
+ from unidecode import unidecode
5
+ from bibla.rule import register_entry_rule, register_text_rule
6
+
7
+
8
+ @register_entry_rule('D00', 'Entry not in alphabetical order by key')
9
+ def keys_alphabetical(key, entry, database):
10
+ """Raise a linter warning when entries are not in alphabetical order by key.
11
+
12
+ :param key: The key of the current bibliography entry
13
+ :param entry: The current bibliography entry
14
+ :param database: All bibliography entries
15
+ :return: True if the key of the current entry is alphabetically larger than
16
+ or equal to the key of the previous entry, False otherwise.
17
+ """
18
+ keys = list(database.entries.keys())
19
+ entry_num = keys.index(key)
20
+ if entry_num == len(keys) - 1:
21
+ return True
22
+ else:
23
+ return entry.key.lower() <= keys[entry_num + 1].lower()
24
+
25
+
26
+ @register_text_rule('D01', 'Preamble should begin at first line of document')
27
+ def line_length(line_number, line, text):
28
+ """Raise a linter warning when the preamble is not on the first line.
29
+
30
+ :param line_number: The number of the current line in the bibliography
31
+ :param line: The content of the current line in the bibliography
32
+ :param text: The entire bibliography
33
+ :return: True if no preamble is present or the preambel starts at line 1 of
34
+ the biblatex file, False otherwise.
35
+ """
36
+ regex = re.compile(r'^\s*@preamble')
37
+ return not regex.match(line.lower()) or line_number == 0
38
+
39
+
40
+ @register_entry_rule('D02', 'Possible duplicate entry based on similar titles')
41
+ def title_duplicate(key, entry, database):
42
+ """Raise a linter warning when entries with a similar titles are present.
43
+
44
+ :param key: The key of the current bibliography entry
45
+ :param entry: The current bibliography entry
46
+ :param database: All bibliography entries
47
+ :return: True if the fuzzy match partial ratio of the title of the current
48
+ entry with any other entry exceeds 90%, False otherwise.
49
+ """
50
+ if 'title' not in entry.fields:
51
+ return True
52
+ for e in database.entries.values():
53
+ if 'title' not in e.fields:
54
+ continue
55
+ t1 = unidecode(entry.fields['title']).lower()
56
+ t2 = unidecode(e.fields['title']).lower()
57
+ if e != entry and fuzz.partial_ratio(t1, t2) > 90:
58
+ return False
59
+ return True