hkeyecite 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- hkeyecite-0.1.0/LICENSE.txt +54 -0
- hkeyecite-0.1.0/PKG-INFO +216 -0
- hkeyecite-0.1.0/README.md +136 -0
- hkeyecite-0.1.0/hkeyecite.egg-info/PKG-INFO +216 -0
- hkeyecite-0.1.0/hkeyecite.egg-info/SOURCES.txt +17 -0
- hkeyecite-0.1.0/hkeyecite.egg-info/dependency_links.txt +1 -0
- hkeyecite-0.1.0/hkeyecite.egg-info/top_level.txt +1 -0
- hkeyecite-0.1.0/pyproject.toml +43 -0
- hkeyecite-0.1.0/setup.cfg +4 -0
- hkeyecite-0.1.0/src/__init__.py +32 -0
- hkeyecite-0.1.0/src/courts.py +271 -0
- hkeyecite-0.1.0/src/find.py +242 -0
- hkeyecite-0.1.0/src/models.py +148 -0
- hkeyecite-0.1.0/src/py.typed +0 -0
- hkeyecite-0.1.0/src/regexes.py +206 -0
- hkeyecite-0.1.0/src/reporters.py +133 -0
- hkeyecite-0.1.0/src/tokenizers.py +191 -0
- hkeyecite-0.1.0/tests/test_eval.py +237 -0
- hkeyecite-0.1.0/tests/test_find.py +243 -0
|
@@ -0,0 +1,54 @@
|
|
|
1
|
+
BSD 2-Clause License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2025, Terracotta
|
|
4
|
+
All rights reserved.
|
|
5
|
+
|
|
6
|
+
Redistribution and use in source and binary forms, with or without
|
|
7
|
+
modification, are permitted provided that the following conditions are met:
|
|
8
|
+
|
|
9
|
+
1. Redistributions of source code must retain the above copyright notice, this
|
|
10
|
+
list of conditions and the following disclaimer.
|
|
11
|
+
|
|
12
|
+
2. Redistributions in binary form must reproduce the above copyright notice,
|
|
13
|
+
this list of conditions and the following disclaimer in the documentation
|
|
14
|
+
and/or other materials provided with the distribution.
|
|
15
|
+
|
|
16
|
+
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
|
17
|
+
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
18
|
+
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
|
19
|
+
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
|
20
|
+
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
|
21
|
+
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
|
22
|
+
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
|
23
|
+
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
|
24
|
+
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
|
25
|
+
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
26
|
+
|
|
27
|
+
---
|
|
28
|
+
|
|
29
|
+
Portions of this software are derived from eyecite
|
|
30
|
+
(https://github.com/freelawproject/eyecite)
|
|
31
|
+
|
|
32
|
+
Copyright (c) 2020, Free Law Project
|
|
33
|
+
All rights reserved.
|
|
34
|
+
|
|
35
|
+
Redistribution and use in source and binary forms, with or without
|
|
36
|
+
modification, are permitted provided that the following conditions are met:
|
|
37
|
+
|
|
38
|
+
1. Redistributions of source code must retain the above copyright notice, this
|
|
39
|
+
list of conditions and the following disclaimer.
|
|
40
|
+
|
|
41
|
+
2. Redistributions in binary form must reproduce the above copyright notice,
|
|
42
|
+
this list of conditions and the following disclaimer in the documentation
|
|
43
|
+
and/or other materials provided with the distribution.
|
|
44
|
+
|
|
45
|
+
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
|
46
|
+
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
47
|
+
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
|
48
|
+
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
|
49
|
+
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
|
50
|
+
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
|
51
|
+
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
|
52
|
+
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
|
53
|
+
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
|
54
|
+
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
hkeyecite-0.1.0/PKG-INFO
ADDED
|
@@ -0,0 +1,216 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: hkeyecite
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: A library for extracting and parsing legal citations from Hong Kong court judgments
|
|
5
|
+
Author: Terracotta
|
|
6
|
+
License: BSD 2-Clause License
|
|
7
|
+
|
|
8
|
+
Copyright (c) 2025, Terracotta
|
|
9
|
+
All rights reserved.
|
|
10
|
+
|
|
11
|
+
Redistribution and use in source and binary forms, with or without
|
|
12
|
+
modification, are permitted provided that the following conditions are met:
|
|
13
|
+
|
|
14
|
+
1. Redistributions of source code must retain the above copyright notice, this
|
|
15
|
+
list of conditions and the following disclaimer.
|
|
16
|
+
|
|
17
|
+
2. Redistributions in binary form must reproduce the above copyright notice,
|
|
18
|
+
this list of conditions and the following disclaimer in the documentation
|
|
19
|
+
and/or other materials provided with the distribution.
|
|
20
|
+
|
|
21
|
+
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
|
22
|
+
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
23
|
+
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
|
24
|
+
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
|
25
|
+
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
|
26
|
+
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
|
27
|
+
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
|
28
|
+
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
|
29
|
+
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
|
30
|
+
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
31
|
+
|
|
32
|
+
---
|
|
33
|
+
|
|
34
|
+
Portions of this software are derived from eyecite
|
|
35
|
+
(https://github.com/freelawproject/eyecite)
|
|
36
|
+
|
|
37
|
+
Copyright (c) 2020, Free Law Project
|
|
38
|
+
All rights reserved.
|
|
39
|
+
|
|
40
|
+
Redistribution and use in source and binary forms, with or without
|
|
41
|
+
modification, are permitted provided that the following conditions are met:
|
|
42
|
+
|
|
43
|
+
1. Redistributions of source code must retain the above copyright notice, this
|
|
44
|
+
list of conditions and the following disclaimer.
|
|
45
|
+
|
|
46
|
+
2. Redistributions in binary form must reproduce the above copyright notice,
|
|
47
|
+
this list of conditions and the following disclaimer in the documentation
|
|
48
|
+
and/or other materials provided with the distribution.
|
|
49
|
+
|
|
50
|
+
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
|
51
|
+
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
52
|
+
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
|
53
|
+
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
|
54
|
+
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
|
55
|
+
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
|
56
|
+
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
|
57
|
+
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
|
58
|
+
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
|
59
|
+
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
60
|
+
|
|
61
|
+
Project-URL: Homepage, https://github.com/terracottalabs/hkeyecite
|
|
62
|
+
Project-URL: Repository, https://github.com/terracottalabs/hkeyecite
|
|
63
|
+
Project-URL: Issues, https://github.com/terracottalabs/hkeyecite/issues
|
|
64
|
+
Keywords: legal,citations,hong kong,law,nlp,courts
|
|
65
|
+
Classifier: Development Status :: 3 - Alpha
|
|
66
|
+
Classifier: Intended Audience :: Developers
|
|
67
|
+
Classifier: Intended Audience :: Legal Industry
|
|
68
|
+
Classifier: License :: OSI Approved :: BSD License
|
|
69
|
+
Classifier: Programming Language :: Python :: 3
|
|
70
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
71
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
72
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
73
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
74
|
+
Classifier: Topic :: Text Processing
|
|
75
|
+
Classifier: Typing :: Typed
|
|
76
|
+
Requires-Python: >=3.10
|
|
77
|
+
Description-Content-Type: text/markdown
|
|
78
|
+
License-File: LICENSE.txt
|
|
79
|
+
Dynamic: license-file
|
|
80
|
+
|
|
81
|
+
# hkeyecite
|
|
82
|
+
|
|
83
|
+
A Python library that automatically finds and extracts legal citations from Hong Kong court judgments.
|
|
84
|
+
|
|
85
|
+
Give it any block of text containing Hong Kong legal references, and it will identify and parse:
|
|
86
|
+
|
|
87
|
+
- **Neutral citations** -- e.g. `[2024] HKCFA 1`
|
|
88
|
+
- **Law report citations** -- e.g. `(2019) 22 HKCFAR 446`
|
|
89
|
+
- **Action numbers** -- e.g. `HCAL 1756/2020`
|
|
90
|
+
|
|
91
|
+
It also extracts case names (e.g. "HKSAR v Harjani") and pinpoint references (e.g. "at [45]") when present.
|
|
92
|
+
|
|
93
|
+

|
|
94
|
+
|
|
95
|
+
## Installation
|
|
96
|
+
|
|
97
|
+
```bash
|
|
98
|
+
pip install hkeyecite
|
|
99
|
+
```
|
|
100
|
+
|
|
101
|
+
Requires Python 3.10 or later.
|
|
102
|
+
|
|
103
|
+
## What it recognises
|
|
104
|
+
|
|
105
|
+
### Neutral citations
|
|
106
|
+
|
|
107
|
+
The format used by the Judiciary since 2018: `[Year] Court Number`
|
|
108
|
+
|
|
109
|
+
| Code | Court |
|
|
110
|
+
|---|---|
|
|
111
|
+
| `HKCFA` | Court of Final Appeal |
|
|
112
|
+
| `HKCA` | Court of Appeal |
|
|
113
|
+
| `HKCFI` | Court of First Instance |
|
|
114
|
+
| `HKDC` | District Court |
|
|
115
|
+
| `HKFC` / `HKFamC` | Family Court |
|
|
116
|
+
| `HKLT` / `HKLdT` | Lands Tribunal |
|
|
117
|
+
| `HKCT` | Competition Tribunal |
|
|
118
|
+
| `HKLBT` / `HKLaT` | Labour Tribunal |
|
|
119
|
+
| `HKSCT` | Small Claims Tribunal |
|
|
120
|
+
| `HKOAT` | Obscene Articles Tribunal |
|
|
121
|
+
| `HKCC` | Coroner's Court |
|
|
122
|
+
| `HKMC` / `HKMagC` | Magistrates' Courts |
|
|
123
|
+
| `CFA` / `CA` / `CFI` | Older alternate codes |
|
|
124
|
+
|
|
125
|
+
### Law report citations
|
|
126
|
+
|
|
127
|
+
References to published law report series: `(Year) Volume Reporter Page`
|
|
128
|
+
|
|
129
|
+
| Code | Report Series |
|
|
130
|
+
|---|---|
|
|
131
|
+
| `HKCFAR` | Hong Kong Court of Final Appeal Reports |
|
|
132
|
+
| `HKLRD` | Hong Kong Law Reports & Digest |
|
|
133
|
+
| `HKC` | Hong Kong Cases |
|
|
134
|
+
| `HKPLR` | Hong Kong Public Law Reports |
|
|
135
|
+
| `HKLR` | Hong Kong Law Reports (historical) |
|
|
136
|
+
| `HKCLR` | Hong Kong Criminal Law Reports |
|
|
137
|
+
| `HKCLRT` | Hong Kong Chinese Law Reports & Translations |
|
|
138
|
+
|
|
139
|
+
Dotted variations (e.g. `H.K.L.R.D.`, `H.K.C.F.A.R.`) are automatically normalised.
|
|
140
|
+
|
|
141
|
+
### Action numbers
|
|
142
|
+
|
|
143
|
+
Case filing references: `Prefix Number/Year`
|
|
144
|
+
|
|
145
|
+
Common prefixes include FACV, FACC (Court of Final Appeal), CACV, CACC (Court of Appeal), HCA, HCAL, HCCC (Court of First Instance), DCCJ, DCCC (District Court), and many more.
|
|
146
|
+
|
|
147
|
+
### Metadata
|
|
148
|
+
|
|
149
|
+
When a citation is preceded by a case name like `HKSAR v Harjani` or `Re Something`, it is automatically extracted. Pinpoint references that follow a citation (`at [45]`, `at para 10`, `at p. 5`) are also captured.
|
|
150
|
+
|
|
151
|
+
## API
|
|
152
|
+
|
|
153
|
+
### `get_citations(text)`
|
|
154
|
+
|
|
155
|
+
The main function. Returns a list of citations found in the text, sorted by position.
|
|
156
|
+
|
|
157
|
+
```python
|
|
158
|
+
from hkeyecite import get_citations
|
|
159
|
+
|
|
160
|
+
citations = get_citations(text)
|
|
161
|
+
```
|
|
162
|
+
|
|
163
|
+
Each citation has:
|
|
164
|
+
- `.matched_text` -- the original text that was matched
|
|
165
|
+
- `.normalized()` -- a standardised form of the citation
|
|
166
|
+
- `.case_name` -- the case name, if one appears before the citation
|
|
167
|
+
- `.pin_cite` -- the pinpoint reference (e.g. paragraph number), if one follows
|
|
168
|
+
- `.start`, `.end` -- character positions in the source text
|
|
169
|
+
|
|
170
|
+
Pass `include_action_numbers=False` to skip action number extraction.
|
|
171
|
+
|
|
172
|
+
### Convenience functions
|
|
173
|
+
|
|
174
|
+
```python
|
|
175
|
+
from hkeyecite.find import extract_neutral_citation, extract_reported_citations, extract_action_numbers
|
|
176
|
+
|
|
177
|
+
# Extract only one type
|
|
178
|
+
neutral = extract_neutral_citation(text)
|
|
179
|
+
reported = extract_reported_citations(text)
|
|
180
|
+
actions = extract_action_numbers(text)
|
|
181
|
+
```
|
|
182
|
+
|
|
183
|
+
### Court and reporter lookup
|
|
184
|
+
|
|
185
|
+
```python
|
|
186
|
+
from hkeyecite.courts import get_court_by_code, get_court_by_case_prefix
|
|
187
|
+
from hkeyecite.reporters import get_reporter
|
|
188
|
+
|
|
189
|
+
court = get_court_by_code("HKCFA")
|
|
190
|
+
court.name # "Court of Final Appeal"
|
|
191
|
+
court.name_zh # "終審法院"
|
|
192
|
+
|
|
193
|
+
court = get_court_by_case_prefix("FACV")
|
|
194
|
+
court.code # "HKCFA"
|
|
195
|
+
|
|
196
|
+
reporter = get_reporter("HKCFAR")
|
|
197
|
+
reporter.name # "Hong Kong Court of Final Appeal Reports"
|
|
198
|
+
```
|
|
199
|
+
|
|
200
|
+
## Limitations
|
|
201
|
+
|
|
202
|
+
- Only Hong Kong citations are supported -- UK, Australian, and other jurisdictions are not covered
|
|
203
|
+
- Case names in Chinese characters are not extracted
|
|
204
|
+
- Some older or non-standard citation formats may not be recognised
|
|
205
|
+
|
|
206
|
+
## Contributing
|
|
207
|
+
|
|
208
|
+
Pull requests are welcome. If you find a citation format that isn't recognised or have ideas for improvement, feel free to open an issue or PR.
|
|
209
|
+
|
|
210
|
+
## Acknowledgments
|
|
211
|
+
|
|
212
|
+
This project is derived from [eyecite](https://github.com/freelawproject/eyecite), a citation extraction library for US legal citations by the Free Law Project. `hkeyecite` adapts eyecite's approach for the Hong Kong jurisdiction, covering HK-specific courts, law reports, action number formats, and neutral citation conventions.
|
|
213
|
+
|
|
214
|
+
## License
|
|
215
|
+
|
|
216
|
+
BSD 2-Clause License -- see [LICENSE.txt](LICENSE.txt) for details.
|
|
@@ -0,0 +1,136 @@
|
|
|
1
|
+
# hkeyecite
|
|
2
|
+
|
|
3
|
+
A Python library that automatically finds and extracts legal citations from Hong Kong court judgments.
|
|
4
|
+
|
|
5
|
+
Give it any block of text containing Hong Kong legal references, and it will identify and parse:
|
|
6
|
+
|
|
7
|
+
- **Neutral citations** -- e.g. `[2024] HKCFA 1`
|
|
8
|
+
- **Law report citations** -- e.g. `(2019) 22 HKCFAR 446`
|
|
9
|
+
- **Action numbers** -- e.g. `HCAL 1756/2020`
|
|
10
|
+
|
|
11
|
+
It also extracts case names (e.g. "HKSAR v Harjani") and pinpoint references (e.g. "at [45]") when present.
|
|
12
|
+
|
|
13
|
+

|
|
14
|
+
|
|
15
|
+
## Installation
|
|
16
|
+
|
|
17
|
+
```bash
|
|
18
|
+
pip install hkeyecite
|
|
19
|
+
```
|
|
20
|
+
|
|
21
|
+
Requires Python 3.10 or later.
|
|
22
|
+
|
|
23
|
+
## What it recognises
|
|
24
|
+
|
|
25
|
+
### Neutral citations
|
|
26
|
+
|
|
27
|
+
The format used by the Judiciary since 2018: `[Year] Court Number`
|
|
28
|
+
|
|
29
|
+
| Code | Court |
|
|
30
|
+
|---|---|
|
|
31
|
+
| `HKCFA` | Court of Final Appeal |
|
|
32
|
+
| `HKCA` | Court of Appeal |
|
|
33
|
+
| `HKCFI` | Court of First Instance |
|
|
34
|
+
| `HKDC` | District Court |
|
|
35
|
+
| `HKFC` / `HKFamC` | Family Court |
|
|
36
|
+
| `HKLT` / `HKLdT` | Lands Tribunal |
|
|
37
|
+
| `HKCT` | Competition Tribunal |
|
|
38
|
+
| `HKLBT` / `HKLaT` | Labour Tribunal |
|
|
39
|
+
| `HKSCT` | Small Claims Tribunal |
|
|
40
|
+
| `HKOAT` | Obscene Articles Tribunal |
|
|
41
|
+
| `HKCC` | Coroner's Court |
|
|
42
|
+
| `HKMC` / `HKMagC` | Magistrates' Courts |
|
|
43
|
+
| `CFA` / `CA` / `CFI` | Older alternate codes |
|
|
44
|
+
|
|
45
|
+
### Law report citations
|
|
46
|
+
|
|
47
|
+
References to published law report series: `(Year) Volume Reporter Page`
|
|
48
|
+
|
|
49
|
+
| Code | Report Series |
|
|
50
|
+
|---|---|
|
|
51
|
+
| `HKCFAR` | Hong Kong Court of Final Appeal Reports |
|
|
52
|
+
| `HKLRD` | Hong Kong Law Reports & Digest |
|
|
53
|
+
| `HKC` | Hong Kong Cases |
|
|
54
|
+
| `HKPLR` | Hong Kong Public Law Reports |
|
|
55
|
+
| `HKLR` | Hong Kong Law Reports (historical) |
|
|
56
|
+
| `HKCLR` | Hong Kong Criminal Law Reports |
|
|
57
|
+
| `HKCLRT` | Hong Kong Chinese Law Reports & Translations |
|
|
58
|
+
|
|
59
|
+
Dotted variations (e.g. `H.K.L.R.D.`, `H.K.C.F.A.R.`) are automatically normalised.
|
|
60
|
+
|
|
61
|
+
### Action numbers
|
|
62
|
+
|
|
63
|
+
Case filing references: `Prefix Number/Year`
|
|
64
|
+
|
|
65
|
+
Common prefixes include FACV, FACC (Court of Final Appeal), CACV, CACC (Court of Appeal), HCA, HCAL, HCCC (Court of First Instance), DCCJ, DCCC (District Court), and many more.
|
|
66
|
+
|
|
67
|
+
### Metadata
|
|
68
|
+
|
|
69
|
+
When a citation is preceded by a case name like `HKSAR v Harjani` or `Re Something`, it is automatically extracted. Pinpoint references that follow a citation (`at [45]`, `at para 10`, `at p. 5`) are also captured.
|
|
70
|
+
|
|
71
|
+
## API
|
|
72
|
+
|
|
73
|
+
### `get_citations(text)`
|
|
74
|
+
|
|
75
|
+
The main function. Returns a list of citations found in the text, sorted by position.
|
|
76
|
+
|
|
77
|
+
```python
|
|
78
|
+
from hkeyecite import get_citations
|
|
79
|
+
|
|
80
|
+
citations = get_citations(text)
|
|
81
|
+
```
|
|
82
|
+
|
|
83
|
+
Each citation has:
|
|
84
|
+
- `.matched_text` -- the original text that was matched
|
|
85
|
+
- `.normalized()` -- a standardised form of the citation
|
|
86
|
+
- `.case_name` -- the case name, if one appears before the citation
|
|
87
|
+
- `.pin_cite` -- the pinpoint reference (e.g. paragraph number), if one follows
|
|
88
|
+
- `.start`, `.end` -- character positions in the source text
|
|
89
|
+
|
|
90
|
+
Pass `include_action_numbers=False` to skip action number extraction.
|
|
91
|
+
|
|
92
|
+
### Convenience functions
|
|
93
|
+
|
|
94
|
+
```python
|
|
95
|
+
from hkeyecite.find import extract_neutral_citation, extract_reported_citations, extract_action_numbers
|
|
96
|
+
|
|
97
|
+
# Extract only one type
|
|
98
|
+
neutral = extract_neutral_citation(text)
|
|
99
|
+
reported = extract_reported_citations(text)
|
|
100
|
+
actions = extract_action_numbers(text)
|
|
101
|
+
```
|
|
102
|
+
|
|
103
|
+
### Court and reporter lookup
|
|
104
|
+
|
|
105
|
+
```python
|
|
106
|
+
from hkeyecite.courts import get_court_by_code, get_court_by_case_prefix
|
|
107
|
+
from hkeyecite.reporters import get_reporter
|
|
108
|
+
|
|
109
|
+
court = get_court_by_code("HKCFA")
|
|
110
|
+
court.name # "Court of Final Appeal"
|
|
111
|
+
court.name_zh # "終審法院"
|
|
112
|
+
|
|
113
|
+
court = get_court_by_case_prefix("FACV")
|
|
114
|
+
court.code # "HKCFA"
|
|
115
|
+
|
|
116
|
+
reporter = get_reporter("HKCFAR")
|
|
117
|
+
reporter.name # "Hong Kong Court of Final Appeal Reports"
|
|
118
|
+
```
|
|
119
|
+
|
|
120
|
+
## Limitations
|
|
121
|
+
|
|
122
|
+
- Only Hong Kong citations are supported -- UK, Australian, and other jurisdictions are not covered
|
|
123
|
+
- Case names in Chinese characters are not extracted
|
|
124
|
+
- Some older or non-standard citation formats may not be recognised
|
|
125
|
+
|
|
126
|
+
## Contributing
|
|
127
|
+
|
|
128
|
+
Pull requests are welcome. If you find a citation format that isn't recognised or have ideas for improvement, feel free to open an issue or PR.
|
|
129
|
+
|
|
130
|
+
## Acknowledgments
|
|
131
|
+
|
|
132
|
+
This project is derived from [eyecite](https://github.com/freelawproject/eyecite), a citation extraction library for US legal citations by the Free Law Project. `hkeyecite` adapts eyecite's approach for the Hong Kong jurisdiction, covering HK-specific courts, law reports, action number formats, and neutral citation conventions.
|
|
133
|
+
|
|
134
|
+
## License
|
|
135
|
+
|
|
136
|
+
BSD 2-Clause License -- see [LICENSE.txt](LICENSE.txt) for details.
|
|
@@ -0,0 +1,216 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: hkeyecite
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: A library for extracting and parsing legal citations from Hong Kong court judgments
|
|
5
|
+
Author: Terracotta
|
|
6
|
+
License: BSD 2-Clause License
|
|
7
|
+
|
|
8
|
+
Copyright (c) 2025, Terracotta
|
|
9
|
+
All rights reserved.
|
|
10
|
+
|
|
11
|
+
Redistribution and use in source and binary forms, with or without
|
|
12
|
+
modification, are permitted provided that the following conditions are met:
|
|
13
|
+
|
|
14
|
+
1. Redistributions of source code must retain the above copyright notice, this
|
|
15
|
+
list of conditions and the following disclaimer.
|
|
16
|
+
|
|
17
|
+
2. Redistributions in binary form must reproduce the above copyright notice,
|
|
18
|
+
this list of conditions and the following disclaimer in the documentation
|
|
19
|
+
and/or other materials provided with the distribution.
|
|
20
|
+
|
|
21
|
+
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
|
22
|
+
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
23
|
+
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
|
24
|
+
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
|
25
|
+
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
|
26
|
+
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
|
27
|
+
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
|
28
|
+
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
|
29
|
+
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
|
30
|
+
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
31
|
+
|
|
32
|
+
---
|
|
33
|
+
|
|
34
|
+
Portions of this software are derived from eyecite
|
|
35
|
+
(https://github.com/freelawproject/eyecite)
|
|
36
|
+
|
|
37
|
+
Copyright (c) 2020, Free Law Project
|
|
38
|
+
All rights reserved.
|
|
39
|
+
|
|
40
|
+
Redistribution and use in source and binary forms, with or without
|
|
41
|
+
modification, are permitted provided that the following conditions are met:
|
|
42
|
+
|
|
43
|
+
1. Redistributions of source code must retain the above copyright notice, this
|
|
44
|
+
list of conditions and the following disclaimer.
|
|
45
|
+
|
|
46
|
+
2. Redistributions in binary form must reproduce the above copyright notice,
|
|
47
|
+
this list of conditions and the following disclaimer in the documentation
|
|
48
|
+
and/or other materials provided with the distribution.
|
|
49
|
+
|
|
50
|
+
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
|
51
|
+
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
52
|
+
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
|
53
|
+
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
|
54
|
+
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
|
55
|
+
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
|
56
|
+
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
|
57
|
+
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
|
58
|
+
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
|
59
|
+
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
60
|
+
|
|
61
|
+
Project-URL: Homepage, https://github.com/terracottalabs/hkeyecite
|
|
62
|
+
Project-URL: Repository, https://github.com/terracottalabs/hkeyecite
|
|
63
|
+
Project-URL: Issues, https://github.com/terracottalabs/hkeyecite/issues
|
|
64
|
+
Keywords: legal,citations,hong kong,law,nlp,courts
|
|
65
|
+
Classifier: Development Status :: 3 - Alpha
|
|
66
|
+
Classifier: Intended Audience :: Developers
|
|
67
|
+
Classifier: Intended Audience :: Legal Industry
|
|
68
|
+
Classifier: License :: OSI Approved :: BSD License
|
|
69
|
+
Classifier: Programming Language :: Python :: 3
|
|
70
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
71
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
72
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
73
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
74
|
+
Classifier: Topic :: Text Processing
|
|
75
|
+
Classifier: Typing :: Typed
|
|
76
|
+
Requires-Python: >=3.10
|
|
77
|
+
Description-Content-Type: text/markdown
|
|
78
|
+
License-File: LICENSE.txt
|
|
79
|
+
Dynamic: license-file
|
|
80
|
+
|
|
81
|
+
# hkeyecite
|
|
82
|
+
|
|
83
|
+
A Python library that automatically finds and extracts legal citations from Hong Kong court judgments.
|
|
84
|
+
|
|
85
|
+
Give it any block of text containing Hong Kong legal references, and it will identify and parse:
|
|
86
|
+
|
|
87
|
+
- **Neutral citations** -- e.g. `[2024] HKCFA 1`
|
|
88
|
+
- **Law report citations** -- e.g. `(2019) 22 HKCFAR 446`
|
|
89
|
+
- **Action numbers** -- e.g. `HCAL 1756/2020`
|
|
90
|
+
|
|
91
|
+
It also extracts case names (e.g. "HKSAR v Harjani") and pinpoint references (e.g. "at [45]") when present.
|
|
92
|
+
|
|
93
|
+

|
|
94
|
+
|
|
95
|
+
## Installation
|
|
96
|
+
|
|
97
|
+
```bash
|
|
98
|
+
pip install hkeyecite
|
|
99
|
+
```
|
|
100
|
+
|
|
101
|
+
Requires Python 3.10 or later.
|
|
102
|
+
|
|
103
|
+
## What it recognises
|
|
104
|
+
|
|
105
|
+
### Neutral citations
|
|
106
|
+
|
|
107
|
+
The format used by the Judiciary since 2018: `[Year] Court Number`
|
|
108
|
+
|
|
109
|
+
| Code | Court |
|
|
110
|
+
|---|---|
|
|
111
|
+
| `HKCFA` | Court of Final Appeal |
|
|
112
|
+
| `HKCA` | Court of Appeal |
|
|
113
|
+
| `HKCFI` | Court of First Instance |
|
|
114
|
+
| `HKDC` | District Court |
|
|
115
|
+
| `HKFC` / `HKFamC` | Family Court |
|
|
116
|
+
| `HKLT` / `HKLdT` | Lands Tribunal |
|
|
117
|
+
| `HKCT` | Competition Tribunal |
|
|
118
|
+
| `HKLBT` / `HKLaT` | Labour Tribunal |
|
|
119
|
+
| `HKSCT` | Small Claims Tribunal |
|
|
120
|
+
| `HKOAT` | Obscene Articles Tribunal |
|
|
121
|
+
| `HKCC` | Coroner's Court |
|
|
122
|
+
| `HKMC` / `HKMagC` | Magistrates' Courts |
|
|
123
|
+
| `CFA` / `CA` / `CFI` | Older alternate codes |
|
|
124
|
+
|
|
125
|
+
### Law report citations
|
|
126
|
+
|
|
127
|
+
References to published law report series: `(Year) Volume Reporter Page`
|
|
128
|
+
|
|
129
|
+
| Code | Report Series |
|
|
130
|
+
|---|---|
|
|
131
|
+
| `HKCFAR` | Hong Kong Court of Final Appeal Reports |
|
|
132
|
+
| `HKLRD` | Hong Kong Law Reports & Digest |
|
|
133
|
+
| `HKC` | Hong Kong Cases |
|
|
134
|
+
| `HKPLR` | Hong Kong Public Law Reports |
|
|
135
|
+
| `HKLR` | Hong Kong Law Reports (historical) |
|
|
136
|
+
| `HKCLR` | Hong Kong Criminal Law Reports |
|
|
137
|
+
| `HKCLRT` | Hong Kong Chinese Law Reports & Translations |
|
|
138
|
+
|
|
139
|
+
Dotted variations (e.g. `H.K.L.R.D.`, `H.K.C.F.A.R.`) are automatically normalised.
|
|
140
|
+
|
|
141
|
+
### Action numbers
|
|
142
|
+
|
|
143
|
+
Case filing references: `Prefix Number/Year`
|
|
144
|
+
|
|
145
|
+
Common prefixes include FACV, FACC (Court of Final Appeal), CACV, CACC (Court of Appeal), HCA, HCAL, HCCC (Court of First Instance), DCCJ, DCCC (District Court), and many more.
|
|
146
|
+
|
|
147
|
+
### Metadata
|
|
148
|
+
|
|
149
|
+
When a citation is preceded by a case name like `HKSAR v Harjani` or `Re Something`, it is automatically extracted. Pinpoint references that follow a citation (`at [45]`, `at para 10`, `at p. 5`) are also captured.
|
|
150
|
+
|
|
151
|
+
## API
|
|
152
|
+
|
|
153
|
+
### `get_citations(text)`
|
|
154
|
+
|
|
155
|
+
The main function. Returns a list of citations found in the text, sorted by position.
|
|
156
|
+
|
|
157
|
+
```python
|
|
158
|
+
from hkeyecite import get_citations
|
|
159
|
+
|
|
160
|
+
citations = get_citations(text)
|
|
161
|
+
```
|
|
162
|
+
|
|
163
|
+
Each citation has:
|
|
164
|
+
- `.matched_text` -- the original text that was matched
|
|
165
|
+
- `.normalized()` -- a standardised form of the citation
|
|
166
|
+
- `.case_name` -- the case name, if one appears before the citation
|
|
167
|
+
- `.pin_cite` -- the pinpoint reference (e.g. paragraph number), if one follows
|
|
168
|
+
- `.start`, `.end` -- character positions in the source text
|
|
169
|
+
|
|
170
|
+
Pass `include_action_numbers=False` to skip action number extraction.
|
|
171
|
+
|
|
172
|
+
### Convenience functions
|
|
173
|
+
|
|
174
|
+
```python
|
|
175
|
+
from hkeyecite.find import extract_neutral_citation, extract_reported_citations, extract_action_numbers
|
|
176
|
+
|
|
177
|
+
# Extract only one type
|
|
178
|
+
neutral = extract_neutral_citation(text)
|
|
179
|
+
reported = extract_reported_citations(text)
|
|
180
|
+
actions = extract_action_numbers(text)
|
|
181
|
+
```
|
|
182
|
+
|
|
183
|
+
### Court and reporter lookup
|
|
184
|
+
|
|
185
|
+
```python
|
|
186
|
+
from hkeyecite.courts import get_court_by_code, get_court_by_case_prefix
|
|
187
|
+
from hkeyecite.reporters import get_reporter
|
|
188
|
+
|
|
189
|
+
court = get_court_by_code("HKCFA")
|
|
190
|
+
court.name # "Court of Final Appeal"
|
|
191
|
+
court.name_zh # "終審法院"
|
|
192
|
+
|
|
193
|
+
court = get_court_by_case_prefix("FACV")
|
|
194
|
+
court.code # "HKCFA"
|
|
195
|
+
|
|
196
|
+
reporter = get_reporter("HKCFAR")
|
|
197
|
+
reporter.name # "Hong Kong Court of Final Appeal Reports"
|
|
198
|
+
```
|
|
199
|
+
|
|
200
|
+
## Limitations
|
|
201
|
+
|
|
202
|
+
- Only Hong Kong citations are supported -- UK, Australian, and other jurisdictions are not covered
|
|
203
|
+
- Case names in Chinese characters are not extracted
|
|
204
|
+
- Some older or non-standard citation formats may not be recognised
|
|
205
|
+
|
|
206
|
+
## Contributing
|
|
207
|
+
|
|
208
|
+
Pull requests are welcome. If you find a citation format that isn't recognised or have ideas for improvement, feel free to open an issue or PR.
|
|
209
|
+
|
|
210
|
+
## Acknowledgments
|
|
211
|
+
|
|
212
|
+
This project is derived from [eyecite](https://github.com/freelawproject/eyecite), a citation extraction library for US legal citations by the Free Law Project. `hkeyecite` adapts eyecite's approach for the Hong Kong jurisdiction, covering HK-specific courts, law reports, action number formats, and neutral citation conventions.
|
|
213
|
+
|
|
214
|
+
## License
|
|
215
|
+
|
|
216
|
+
BSD 2-Clause License -- see [LICENSE.txt](LICENSE.txt) for details.
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
LICENSE.txt
|
|
2
|
+
README.md
|
|
3
|
+
pyproject.toml
|
|
4
|
+
hkeyecite.egg-info/PKG-INFO
|
|
5
|
+
hkeyecite.egg-info/SOURCES.txt
|
|
6
|
+
hkeyecite.egg-info/dependency_links.txt
|
|
7
|
+
hkeyecite.egg-info/top_level.txt
|
|
8
|
+
src/__init__.py
|
|
9
|
+
src/courts.py
|
|
10
|
+
src/find.py
|
|
11
|
+
src/models.py
|
|
12
|
+
src/py.typed
|
|
13
|
+
src/regexes.py
|
|
14
|
+
src/reporters.py
|
|
15
|
+
src/tokenizers.py
|
|
16
|
+
tests/test_eval.py
|
|
17
|
+
tests/test_find.py
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
hkeyecite
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["setuptools>=61.0"]
|
|
3
|
+
build-backend = "setuptools.build_meta"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "hkeyecite"
|
|
7
|
+
version = "0.1.0"
|
|
8
|
+
description = "A library for extracting and parsing legal citations from Hong Kong court judgments"
|
|
9
|
+
readme = "README.md"
|
|
10
|
+
license = {file = "LICENSE.txt"}
|
|
11
|
+
requires-python = ">=3.10"
|
|
12
|
+
authors = [
|
|
13
|
+
{ name = "Terracotta" },
|
|
14
|
+
]
|
|
15
|
+
keywords = ["legal", "citations", "hong kong", "law", "nlp", "courts"]
|
|
16
|
+
classifiers = [
|
|
17
|
+
"Development Status :: 3 - Alpha",
|
|
18
|
+
"Intended Audience :: Developers",
|
|
19
|
+
"Intended Audience :: Legal Industry",
|
|
20
|
+
"License :: OSI Approved :: BSD License",
|
|
21
|
+
"Programming Language :: Python :: 3",
|
|
22
|
+
"Programming Language :: Python :: 3.10",
|
|
23
|
+
"Programming Language :: Python :: 3.11",
|
|
24
|
+
"Programming Language :: Python :: 3.12",
|
|
25
|
+
"Programming Language :: Python :: 3.13",
|
|
26
|
+
"Topic :: Text Processing",
|
|
27
|
+
"Typing :: Typed",
|
|
28
|
+
]
|
|
29
|
+
|
|
30
|
+
[project.urls]
|
|
31
|
+
Homepage = "https://github.com/terracottalabs/hkeyecite"
|
|
32
|
+
Repository = "https://github.com/terracottalabs/hkeyecite"
|
|
33
|
+
Issues = "https://github.com/terracottalabs/hkeyecite/issues"
|
|
34
|
+
|
|
35
|
+
[tool.setuptools]
|
|
36
|
+
package-dir = {"hkeyecite" = "src"}
|
|
37
|
+
packages = ["hkeyecite"]
|
|
38
|
+
|
|
39
|
+
[tool.setuptools.package-data]
|
|
40
|
+
hkeyecite = ["py.typed"]
|
|
41
|
+
|
|
42
|
+
[tool.setuptools.exclude-package-data]
|
|
43
|
+
"*" = ["demo.gif", "demo.tape"]
|