hkeyecite 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,54 @@
1
+ BSD 2-Clause License
2
+
3
+ Copyright (c) 2025, Terracotta
4
+ All rights reserved.
5
+
6
+ Redistribution and use in source and binary forms, with or without
7
+ modification, are permitted provided that the following conditions are met:
8
+
9
+ 1. Redistributions of source code must retain the above copyright notice, this
10
+ list of conditions and the following disclaimer.
11
+
12
+ 2. Redistributions in binary form must reproduce the above copyright notice,
13
+ this list of conditions and the following disclaimer in the documentation
14
+ and/or other materials provided with the distribution.
15
+
16
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17
+ AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18
+ IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19
+ DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
20
+ FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21
+ DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
22
+ SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
23
+ CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
24
+ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26
+
27
+ ---
28
+
29
+ Portions of this software are derived from eyecite
30
+ (https://github.com/freelawproject/eyecite)
31
+
32
+ Copyright (c) 2020, Free Law Project
33
+ All rights reserved.
34
+
35
+ Redistribution and use in source and binary forms, with or without
36
+ modification, are permitted provided that the following conditions are met:
37
+
38
+ 1. Redistributions of source code must retain the above copyright notice, this
39
+ list of conditions and the following disclaimer.
40
+
41
+ 2. Redistributions in binary form must reproduce the above copyright notice,
42
+ this list of conditions and the following disclaimer in the documentation
43
+ and/or other materials provided with the distribution.
44
+
45
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
46
+ AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
47
+ IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
48
+ DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
49
+ FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
50
+ DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
51
+ SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
52
+ CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
53
+ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
54
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
@@ -0,0 +1,216 @@
1
+ Metadata-Version: 2.4
2
+ Name: hkeyecite
3
+ Version: 0.1.0
4
+ Summary: A library for extracting and parsing legal citations from Hong Kong court judgments
5
+ Author: Terracotta
6
+ License: BSD 2-Clause License
7
+
8
+ Copyright (c) 2025, Terracotta
9
+ All rights reserved.
10
+
11
+ Redistribution and use in source and binary forms, with or without
12
+ modification, are permitted provided that the following conditions are met:
13
+
14
+ 1. Redistributions of source code must retain the above copyright notice, this
15
+ list of conditions and the following disclaimer.
16
+
17
+ 2. Redistributions in binary form must reproduce the above copyright notice,
18
+ this list of conditions and the following disclaimer in the documentation
19
+ and/or other materials provided with the distribution.
20
+
21
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
22
+ AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23
+ IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
24
+ DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
25
+ FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26
+ DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
27
+ SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
28
+ CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
29
+ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31
+
32
+ ---
33
+
34
+ Portions of this software are derived from eyecite
35
+ (https://github.com/freelawproject/eyecite)
36
+
37
+ Copyright (c) 2020, Free Law Project
38
+ All rights reserved.
39
+
40
+ Redistribution and use in source and binary forms, with or without
41
+ modification, are permitted provided that the following conditions are met:
42
+
43
+ 1. Redistributions of source code must retain the above copyright notice, this
44
+ list of conditions and the following disclaimer.
45
+
46
+ 2. Redistributions in binary form must reproduce the above copyright notice,
47
+ this list of conditions and the following disclaimer in the documentation
48
+ and/or other materials provided with the distribution.
49
+
50
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
51
+ AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
52
+ IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
53
+ DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
54
+ FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
55
+ DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
56
+ SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
57
+ CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
58
+ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
59
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
60
+
61
+ Project-URL: Homepage, https://github.com/terracottalabs/hkeyecite
62
+ Project-URL: Repository, https://github.com/terracottalabs/hkeyecite
63
+ Project-URL: Issues, https://github.com/terracottalabs/hkeyecite/issues
64
+ Keywords: legal,citations,hong kong,law,nlp,courts
65
+ Classifier: Development Status :: 3 - Alpha
66
+ Classifier: Intended Audience :: Developers
67
+ Classifier: Intended Audience :: Legal Industry
68
+ Classifier: License :: OSI Approved :: BSD License
69
+ Classifier: Programming Language :: Python :: 3
70
+ Classifier: Programming Language :: Python :: 3.10
71
+ Classifier: Programming Language :: Python :: 3.11
72
+ Classifier: Programming Language :: Python :: 3.12
73
+ Classifier: Programming Language :: Python :: 3.13
74
+ Classifier: Topic :: Text Processing
75
+ Classifier: Typing :: Typed
76
+ Requires-Python: >=3.10
77
+ Description-Content-Type: text/markdown
78
+ License-File: LICENSE.txt
79
+ Dynamic: license-file
80
+
81
+ # hkeyecite
82
+
83
+ A Python library that automatically finds and extracts legal citations from Hong Kong court judgments.
84
+
85
+ Give it any block of text containing Hong Kong legal references, and it will identify and parse:
86
+
87
+ - **Neutral citations** -- e.g. `[2024] HKCFA 1`
88
+ - **Law report citations** -- e.g. `(2019) 22 HKCFAR 446`
89
+ - **Action numbers** -- e.g. `HCAL 1756/2020`
90
+
91
+ It also extracts case names (e.g. "HKSAR v Harjani") and pinpoint references (e.g. "at [45]") when present.
92
+
93
+ ![Demo](demo.gif)
94
+
95
+ ## Installation
96
+
97
+ ```bash
98
+ pip install hkeyecite
99
+ ```
100
+
101
+ Requires Python 3.10 or later.
102
+
103
+ ## What it recognises
104
+
105
+ ### Neutral citations
106
+
107
+ The format used by the Judiciary since 2018: `[Year] Court Number`
108
+
109
+ | Code | Court |
110
+ |---|---|
111
+ | `HKCFA` | Court of Final Appeal |
112
+ | `HKCA` | Court of Appeal |
113
+ | `HKCFI` | Court of First Instance |
114
+ | `HKDC` | District Court |
115
+ | `HKFC` / `HKFamC` | Family Court |
116
+ | `HKLT` / `HKLdT` | Lands Tribunal |
117
+ | `HKCT` | Competition Tribunal |
118
+ | `HKLBT` / `HKLaT` | Labour Tribunal |
119
+ | `HKSCT` | Small Claims Tribunal |
120
+ | `HKOAT` | Obscene Articles Tribunal |
121
+ | `HKCC` | Coroner's Court |
122
+ | `HKMC` / `HKMagC` | Magistrates' Courts |
123
+ | `CFA` / `CA` / `CFI` | Older alternate codes |
124
+
125
+ ### Law report citations
126
+
127
+ References to published law report series: `(Year) Volume Reporter Page`
128
+
129
+ | Code | Report Series |
130
+ |---|---|
131
+ | `HKCFAR` | Hong Kong Court of Final Appeal Reports |
132
+ | `HKLRD` | Hong Kong Law Reports & Digest |
133
+ | `HKC` | Hong Kong Cases |
134
+ | `HKPLR` | Hong Kong Public Law Reports |
135
+ | `HKLR` | Hong Kong Law Reports (historical) |
136
+ | `HKCLR` | Hong Kong Criminal Law Reports |
137
+ | `HKCLRT` | Hong Kong Chinese Law Reports & Translations |
138
+
139
+ Dotted variations (e.g. `H.K.L.R.D.`, `H.K.C.F.A.R.`) are automatically normalised.
140
+
141
+ ### Action numbers
142
+
143
+ Case filing references: `Prefix Number/Year`
144
+
145
+ Common prefixes include FACV, FACC (Court of Final Appeal), CACV, CACC (Court of Appeal), HCA, HCAL, HCCC (Court of First Instance), DCCJ, DCCC (District Court), and many more.
146
+
147
+ ### Metadata
148
+
149
+ When a citation is preceded by a case name like `HKSAR v Harjani` or `Re Something`, it is automatically extracted. Pinpoint references that follow a citation (`at [45]`, `at para 10`, `at p. 5`) are also captured.
150
+
151
+ ## API
152
+
153
+ ### `get_citations(text)`
154
+
155
+ The main function. Returns a list of citations found in the text, sorted by position.
156
+
157
+ ```python
158
+ from hkeyecite import get_citations
159
+
160
+ citations = get_citations(text)
161
+ ```
162
+
163
+ Each citation has:
164
+ - `.matched_text` -- the original text that was matched
165
+ - `.normalized()` -- a standardised form of the citation
166
+ - `.case_name` -- the case name, if one appears before the citation
167
+ - `.pin_cite` -- the pinpoint reference (e.g. paragraph number), if one follows
168
+ - `.start`, `.end` -- character positions in the source text
169
+
170
+ Pass `include_action_numbers=False` to skip action number extraction.
171
+
172
+ ### Convenience functions
173
+
174
+ ```python
175
+ from hkeyecite.find import extract_neutral_citation, extract_reported_citations, extract_action_numbers
176
+
177
+ # Extract only one type
178
+ neutral = extract_neutral_citation(text)
179
+ reported = extract_reported_citations(text)
180
+ actions = extract_action_numbers(text)
181
+ ```
182
+
183
+ ### Court and reporter lookup
184
+
185
+ ```python
186
+ from hkeyecite.courts import get_court_by_code, get_court_by_case_prefix
187
+ from hkeyecite.reporters import get_reporter
188
+
189
+ court = get_court_by_code("HKCFA")
190
+ court.name # "Court of Final Appeal"
191
+ court.name_zh # "終審法院"
192
+
193
+ court = get_court_by_case_prefix("FACV")
194
+ court.code # "HKCFA"
195
+
196
+ reporter = get_reporter("HKCFAR")
197
+ reporter.name # "Hong Kong Court of Final Appeal Reports"
198
+ ```
199
+
200
+ ## Limitations
201
+
202
+ - Only Hong Kong citations are supported -- UK, Australian, and other jurisdictions are not covered
203
+ - Case names in Chinese characters are not extracted
204
+ - Some older or non-standard citation formats may not be recognised
205
+
206
+ ## Contributing
207
+
208
+ Pull requests are welcome. If you find a citation format that isn't recognised or have ideas for improvement, feel free to open an issue or PR.
209
+
210
+ ## Acknowledgments
211
+
212
+ This project is derived from [eyecite](https://github.com/freelawproject/eyecite), a citation extraction library for US legal citations by the Free Law Project. `hkeyecite` adapts eyecite's approach for the Hong Kong jurisdiction, covering HK-specific courts, law reports, action number formats, and neutral citation conventions.
213
+
214
+ ## License
215
+
216
+ BSD 2-Clause License -- see [LICENSE.txt](LICENSE.txt) for details.
@@ -0,0 +1,136 @@
1
+ # hkeyecite
2
+
3
+ A Python library that automatically finds and extracts legal citations from Hong Kong court judgments.
4
+
5
+ Give it any block of text containing Hong Kong legal references, and it will identify and parse:
6
+
7
+ - **Neutral citations** -- e.g. `[2024] HKCFA 1`
8
+ - **Law report citations** -- e.g. `(2019) 22 HKCFAR 446`
9
+ - **Action numbers** -- e.g. `HCAL 1756/2020`
10
+
11
+ It also extracts case names (e.g. "HKSAR v Harjani") and pinpoint references (e.g. "at [45]") when present.
12
+
13
+ ![Demo](demo.gif)
14
+
15
+ ## Installation
16
+
17
+ ```bash
18
+ pip install hkeyecite
19
+ ```
20
+
21
+ Requires Python 3.10 or later.
22
+
23
+ ## What it recognises
24
+
25
+ ### Neutral citations
26
+
27
+ The format used by the Judiciary since 2018: `[Year] Court Number`
28
+
29
+ | Code | Court |
30
+ |---|---|
31
+ | `HKCFA` | Court of Final Appeal |
32
+ | `HKCA` | Court of Appeal |
33
+ | `HKCFI` | Court of First Instance |
34
+ | `HKDC` | District Court |
35
+ | `HKFC` / `HKFamC` | Family Court |
36
+ | `HKLT` / `HKLdT` | Lands Tribunal |
37
+ | `HKCT` | Competition Tribunal |
38
+ | `HKLBT` / `HKLaT` | Labour Tribunal |
39
+ | `HKSCT` | Small Claims Tribunal |
40
+ | `HKOAT` | Obscene Articles Tribunal |
41
+ | `HKCC` | Coroner's Court |
42
+ | `HKMC` / `HKMagC` | Magistrates' Courts |
43
+ | `CFA` / `CA` / `CFI` | Older alternate codes |
44
+
45
+ ### Law report citations
46
+
47
+ References to published law report series: `(Year) Volume Reporter Page`
48
+
49
+ | Code | Report Series |
50
+ |---|---|
51
+ | `HKCFAR` | Hong Kong Court of Final Appeal Reports |
52
+ | `HKLRD` | Hong Kong Law Reports & Digest |
53
+ | `HKC` | Hong Kong Cases |
54
+ | `HKPLR` | Hong Kong Public Law Reports |
55
+ | `HKLR` | Hong Kong Law Reports (historical) |
56
+ | `HKCLR` | Hong Kong Criminal Law Reports |
57
+ | `HKCLRT` | Hong Kong Chinese Law Reports & Translations |
58
+
59
+ Dotted variations (e.g. `H.K.L.R.D.`, `H.K.C.F.A.R.`) are automatically normalised.
60
+
61
+ ### Action numbers
62
+
63
+ Case filing references: `Prefix Number/Year`
64
+
65
+ Common prefixes include FACV, FACC (Court of Final Appeal), CACV, CACC (Court of Appeal), HCA, HCAL, HCCC (Court of First Instance), DCCJ, DCCC (District Court), and many more.
66
+
67
+ ### Metadata
68
+
69
+ When a citation is preceded by a case name like `HKSAR v Harjani` or `Re Something`, it is automatically extracted. Pinpoint references that follow a citation (`at [45]`, `at para 10`, `at p. 5`) are also captured.
70
+
71
+ ## API
72
+
73
+ ### `get_citations(text)`
74
+
75
+ The main function. Returns a list of citations found in the text, sorted by position.
76
+
77
+ ```python
78
+ from hkeyecite import get_citations
79
+
80
+ citations = get_citations(text)
81
+ ```
82
+
83
+ Each citation has:
84
+ - `.matched_text` -- the original text that was matched
85
+ - `.normalized()` -- a standardised form of the citation
86
+ - `.case_name` -- the case name, if one appears before the citation
87
+ - `.pin_cite` -- the pinpoint reference (e.g. paragraph number), if one follows
88
+ - `.start`, `.end` -- character positions in the source text
89
+
90
+ Pass `include_action_numbers=False` to skip action number extraction.
91
+
92
+ ### Convenience functions
93
+
94
+ ```python
95
+ from hkeyecite.find import extract_neutral_citation, extract_reported_citations, extract_action_numbers
96
+
97
+ # Extract only one type
98
+ neutral = extract_neutral_citation(text)
99
+ reported = extract_reported_citations(text)
100
+ actions = extract_action_numbers(text)
101
+ ```
102
+
103
+ ### Court and reporter lookup
104
+
105
+ ```python
106
+ from hkeyecite.courts import get_court_by_code, get_court_by_case_prefix
107
+ from hkeyecite.reporters import get_reporter
108
+
109
+ court = get_court_by_code("HKCFA")
110
+ court.name # "Court of Final Appeal"
111
+ court.name_zh # "終審法院"
112
+
113
+ court = get_court_by_case_prefix("FACV")
114
+ court.code # "HKCFA"
115
+
116
+ reporter = get_reporter("HKCFAR")
117
+ reporter.name # "Hong Kong Court of Final Appeal Reports"
118
+ ```
119
+
120
+ ## Limitations
121
+
122
+ - Only Hong Kong citations are supported -- UK, Australian, and other jurisdictions are not covered
123
+ - Case names in Chinese characters are not extracted
124
+ - Some older or non-standard citation formats may not be recognised
125
+
126
+ ## Contributing
127
+
128
+ Pull requests are welcome. If you find a citation format that isn't recognised or have ideas for improvement, feel free to open an issue or PR.
129
+
130
+ ## Acknowledgments
131
+
132
+ This project is derived from [eyecite](https://github.com/freelawproject/eyecite), a citation extraction library for US legal citations by the Free Law Project. `hkeyecite` adapts eyecite's approach for the Hong Kong jurisdiction, covering HK-specific courts, law reports, action number formats, and neutral citation conventions.
133
+
134
+ ## License
135
+
136
+ BSD 2-Clause License -- see [LICENSE.txt](LICENSE.txt) for details.
@@ -0,0 +1,216 @@
1
+ Metadata-Version: 2.4
2
+ Name: hkeyecite
3
+ Version: 0.1.0
4
+ Summary: A library for extracting and parsing legal citations from Hong Kong court judgments
5
+ Author: Terracotta
6
+ License: BSD 2-Clause License
7
+
8
+ Copyright (c) 2025, Terracotta
9
+ All rights reserved.
10
+
11
+ Redistribution and use in source and binary forms, with or without
12
+ modification, are permitted provided that the following conditions are met:
13
+
14
+ 1. Redistributions of source code must retain the above copyright notice, this
15
+ list of conditions and the following disclaimer.
16
+
17
+ 2. Redistributions in binary form must reproduce the above copyright notice,
18
+ this list of conditions and the following disclaimer in the documentation
19
+ and/or other materials provided with the distribution.
20
+
21
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
22
+ AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23
+ IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
24
+ DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
25
+ FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26
+ DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
27
+ SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
28
+ CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
29
+ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31
+
32
+ ---
33
+
34
+ Portions of this software are derived from eyecite
35
+ (https://github.com/freelawproject/eyecite)
36
+
37
+ Copyright (c) 2020, Free Law Project
38
+ All rights reserved.
39
+
40
+ Redistribution and use in source and binary forms, with or without
41
+ modification, are permitted provided that the following conditions are met:
42
+
43
+ 1. Redistributions of source code must retain the above copyright notice, this
44
+ list of conditions and the following disclaimer.
45
+
46
+ 2. Redistributions in binary form must reproduce the above copyright notice,
47
+ this list of conditions and the following disclaimer in the documentation
48
+ and/or other materials provided with the distribution.
49
+
50
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
51
+ AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
52
+ IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
53
+ DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
54
+ FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
55
+ DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
56
+ SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
57
+ CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
58
+ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
59
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
60
+
61
+ Project-URL: Homepage, https://github.com/terracottalabs/hkeyecite
62
+ Project-URL: Repository, https://github.com/terracottalabs/hkeyecite
63
+ Project-URL: Issues, https://github.com/terracottalabs/hkeyecite/issues
64
+ Keywords: legal,citations,hong kong,law,nlp,courts
65
+ Classifier: Development Status :: 3 - Alpha
66
+ Classifier: Intended Audience :: Developers
67
+ Classifier: Intended Audience :: Legal Industry
68
+ Classifier: License :: OSI Approved :: BSD License
69
+ Classifier: Programming Language :: Python :: 3
70
+ Classifier: Programming Language :: Python :: 3.10
71
+ Classifier: Programming Language :: Python :: 3.11
72
+ Classifier: Programming Language :: Python :: 3.12
73
+ Classifier: Programming Language :: Python :: 3.13
74
+ Classifier: Topic :: Text Processing
75
+ Classifier: Typing :: Typed
76
+ Requires-Python: >=3.10
77
+ Description-Content-Type: text/markdown
78
+ License-File: LICENSE.txt
79
+ Dynamic: license-file
80
+
81
+ # hkeyecite
82
+
83
+ A Python library that automatically finds and extracts legal citations from Hong Kong court judgments.
84
+
85
+ Give it any block of text containing Hong Kong legal references, and it will identify and parse:
86
+
87
+ - **Neutral citations** -- e.g. `[2024] HKCFA 1`
88
+ - **Law report citations** -- e.g. `(2019) 22 HKCFAR 446`
89
+ - **Action numbers** -- e.g. `HCAL 1756/2020`
90
+
91
+ It also extracts case names (e.g. "HKSAR v Harjani") and pinpoint references (e.g. "at [45]") when present.
92
+
93
+ ![Demo](demo.gif)
94
+
95
+ ## Installation
96
+
97
+ ```bash
98
+ pip install hkeyecite
99
+ ```
100
+
101
+ Requires Python 3.10 or later.
102
+
103
+ ## What it recognises
104
+
105
+ ### Neutral citations
106
+
107
+ The format used by the Judiciary since 2018: `[Year] Court Number`
108
+
109
+ | Code | Court |
110
+ |---|---|
111
+ | `HKCFA` | Court of Final Appeal |
112
+ | `HKCA` | Court of Appeal |
113
+ | `HKCFI` | Court of First Instance |
114
+ | `HKDC` | District Court |
115
+ | `HKFC` / `HKFamC` | Family Court |
116
+ | `HKLT` / `HKLdT` | Lands Tribunal |
117
+ | `HKCT` | Competition Tribunal |
118
+ | `HKLBT` / `HKLaT` | Labour Tribunal |
119
+ | `HKSCT` | Small Claims Tribunal |
120
+ | `HKOAT` | Obscene Articles Tribunal |
121
+ | `HKCC` | Coroner's Court |
122
+ | `HKMC` / `HKMagC` | Magistrates' Courts |
123
+ | `CFA` / `CA` / `CFI` | Older alternate codes |
124
+
125
+ ### Law report citations
126
+
127
+ References to published law report series: `(Year) Volume Reporter Page`
128
+
129
+ | Code | Report Series |
130
+ |---|---|
131
+ | `HKCFAR` | Hong Kong Court of Final Appeal Reports |
132
+ | `HKLRD` | Hong Kong Law Reports & Digest |
133
+ | `HKC` | Hong Kong Cases |
134
+ | `HKPLR` | Hong Kong Public Law Reports |
135
+ | `HKLR` | Hong Kong Law Reports (historical) |
136
+ | `HKCLR` | Hong Kong Criminal Law Reports |
137
+ | `HKCLRT` | Hong Kong Chinese Law Reports & Translations |
138
+
139
+ Dotted variations (e.g. `H.K.L.R.D.`, `H.K.C.F.A.R.`) are automatically normalised.
140
+
141
+ ### Action numbers
142
+
143
+ Case filing references: `Prefix Number/Year`
144
+
145
+ Common prefixes include FACV, FACC (Court of Final Appeal), CACV, CACC (Court of Appeal), HCA, HCAL, HCCC (Court of First Instance), DCCJ, DCCC (District Court), and many more.
146
+
147
+ ### Metadata
148
+
149
+ When a citation is preceded by a case name like `HKSAR v Harjani` or `Re Something`, it is automatically extracted. Pinpoint references that follow a citation (`at [45]`, `at para 10`, `at p. 5`) are also captured.
150
+
151
+ ## API
152
+
153
+ ### `get_citations(text)`
154
+
155
+ The main function. Returns a list of citations found in the text, sorted by position.
156
+
157
+ ```python
158
+ from hkeyecite import get_citations
159
+
160
+ citations = get_citations(text)
161
+ ```
162
+
163
+ Each citation has:
164
+ - `.matched_text` -- the original text that was matched
165
+ - `.normalized()` -- a standardised form of the citation
166
+ - `.case_name` -- the case name, if one appears before the citation
167
+ - `.pin_cite` -- the pinpoint reference (e.g. paragraph number), if one follows
168
+ - `.start`, `.end` -- character positions in the source text
169
+
170
+ Pass `include_action_numbers=False` to skip action number extraction.
171
+
172
+ ### Convenience functions
173
+
174
+ ```python
175
+ from hkeyecite.find import extract_neutral_citation, extract_reported_citations, extract_action_numbers
176
+
177
+ # Extract only one type
178
+ neutral = extract_neutral_citation(text)
179
+ reported = extract_reported_citations(text)
180
+ actions = extract_action_numbers(text)
181
+ ```
182
+
183
+ ### Court and reporter lookup
184
+
185
+ ```python
186
+ from hkeyecite.courts import get_court_by_code, get_court_by_case_prefix
187
+ from hkeyecite.reporters import get_reporter
188
+
189
+ court = get_court_by_code("HKCFA")
190
+ court.name # "Court of Final Appeal"
191
+ court.name_zh # "終審法院"
192
+
193
+ court = get_court_by_case_prefix("FACV")
194
+ court.code # "HKCFA"
195
+
196
+ reporter = get_reporter("HKCFAR")
197
+ reporter.name # "Hong Kong Court of Final Appeal Reports"
198
+ ```
199
+
200
+ ## Limitations
201
+
202
+ - Only Hong Kong citations are supported -- UK, Australian, and other jurisdictions are not covered
203
+ - Case names in Chinese characters are not extracted
204
+ - Some older or non-standard citation formats may not be recognised
205
+
206
+ ## Contributing
207
+
208
+ Pull requests are welcome. If you find a citation format that isn't recognised or have ideas for improvement, feel free to open an issue or PR.
209
+
210
+ ## Acknowledgments
211
+
212
+ This project is derived from [eyecite](https://github.com/freelawproject/eyecite), a citation extraction library for US legal citations by the Free Law Project. `hkeyecite` adapts eyecite's approach for the Hong Kong jurisdiction, covering HK-specific courts, law reports, action number formats, and neutral citation conventions.
213
+
214
+ ## License
215
+
216
+ BSD 2-Clause License -- see [LICENSE.txt](LICENSE.txt) for details.
@@ -0,0 +1,17 @@
1
+ LICENSE.txt
2
+ README.md
3
+ pyproject.toml
4
+ hkeyecite.egg-info/PKG-INFO
5
+ hkeyecite.egg-info/SOURCES.txt
6
+ hkeyecite.egg-info/dependency_links.txt
7
+ hkeyecite.egg-info/top_level.txt
8
+ src/__init__.py
9
+ src/courts.py
10
+ src/find.py
11
+ src/models.py
12
+ src/py.typed
13
+ src/regexes.py
14
+ src/reporters.py
15
+ src/tokenizers.py
16
+ tests/test_eval.py
17
+ tests/test_find.py
@@ -0,0 +1 @@
1
+ hkeyecite
@@ -0,0 +1,43 @@
1
+ [build-system]
2
+ requires = ["setuptools>=61.0"]
3
+ build-backend = "setuptools.build_meta"
4
+
5
+ [project]
6
+ name = "hkeyecite"
7
+ version = "0.1.0"
8
+ description = "A library for extracting and parsing legal citations from Hong Kong court judgments"
9
+ readme = "README.md"
10
+ license = {file = "LICENSE.txt"}
11
+ requires-python = ">=3.10"
12
+ authors = [
13
+ { name = "Terracotta" },
14
+ ]
15
+ keywords = ["legal", "citations", "hong kong", "law", "nlp", "courts"]
16
+ classifiers = [
17
+ "Development Status :: 3 - Alpha",
18
+ "Intended Audience :: Developers",
19
+ "Intended Audience :: Legal Industry",
20
+ "License :: OSI Approved :: BSD License",
21
+ "Programming Language :: Python :: 3",
22
+ "Programming Language :: Python :: 3.10",
23
+ "Programming Language :: Python :: 3.11",
24
+ "Programming Language :: Python :: 3.12",
25
+ "Programming Language :: Python :: 3.13",
26
+ "Topic :: Text Processing",
27
+ "Typing :: Typed",
28
+ ]
29
+
30
+ [project.urls]
31
+ Homepage = "https://github.com/terracottalabs/hkeyecite"
32
+ Repository = "https://github.com/terracottalabs/hkeyecite"
33
+ Issues = "https://github.com/terracottalabs/hkeyecite/issues"
34
+
35
+ [tool.setuptools]
36
+ package-dir = {"hkeyecite" = "src"}
37
+ packages = ["hkeyecite"]
38
+
39
+ [tool.setuptools.package-data]
40
+ hkeyecite = ["py.typed"]
41
+
42
+ [tool.setuptools.exclude-package-data]
43
+ "*" = ["demo.gif", "demo.tape"]