barscan 0.2.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- barscan-0.2.2/.env.example +7 -0
- barscan-0.2.2/.github/workflows/ci.yml +37 -0
- barscan-0.2.2/.github/workflows/release.yml +72 -0
- barscan-0.2.2/.gitignore +84 -0
- barscan-0.2.2/LICENSE +21 -0
- barscan-0.2.2/PKG-INFO +354 -0
- barscan-0.2.2/README.md +315 -0
- barscan-0.2.2/pyproject.toml +81 -0
- barscan-0.2.2/src/barscan/__init__.py +30 -0
- barscan-0.2.2/src/barscan/__main__.py +6 -0
- barscan-0.2.2/src/barscan/analyzer/__init__.py +71 -0
- barscan-0.2.2/src/barscan/analyzer/context.py +172 -0
- barscan-0.2.2/src/barscan/analyzer/filters.py +218 -0
- barscan-0.2.2/src/barscan/analyzer/frequency.py +260 -0
- barscan-0.2.2/src/barscan/analyzer/models.py +186 -0
- barscan-0.2.2/src/barscan/analyzer/nltk_resources.py +80 -0
- barscan-0.2.2/src/barscan/analyzer/pos.py +134 -0
- barscan-0.2.2/src/barscan/analyzer/processor.py +341 -0
- barscan-0.2.2/src/barscan/analyzer/sentiment.py +110 -0
- barscan-0.2.2/src/barscan/analyzer/slang.py +240 -0
- barscan-0.2.2/src/barscan/analyzer/stopwords_ja.py +23 -0
- barscan-0.2.2/src/barscan/analyzer/tfidf.py +146 -0
- barscan-0.2.2/src/barscan/analyzer/tokenizer.py +282 -0
- barscan-0.2.2/src/barscan/cli.py +528 -0
- barscan-0.2.2/src/barscan/config.py +57 -0
- barscan-0.2.2/src/barscan/exceptions.py +91 -0
- barscan-0.2.2/src/barscan/genius/__init__.py +15 -0
- barscan-0.2.2/src/barscan/genius/cache.py +195 -0
- barscan-0.2.2/src/barscan/genius/client.py +364 -0
- barscan-0.2.2/src/barscan/genius/models.py +73 -0
- barscan-0.2.2/src/barscan/logging.py +48 -0
- barscan-0.2.2/src/barscan/output/__init__.py +25 -0
- barscan-0.2.2/src/barscan/output/wordgrain.py +344 -0
- barscan-0.2.2/src/barscan/py.typed +0 -0
- barscan-0.2.2/tests/__init__.py +1 -0
- barscan-0.2.2/tests/test_analyzer/__init__.py +1 -0
- barscan-0.2.2/tests/test_analyzer/conftest.py +66 -0
- barscan-0.2.2/tests/test_analyzer/test_context.py +271 -0
- barscan-0.2.2/tests/test_analyzer/test_filters.py +337 -0
- barscan-0.2.2/tests/test_analyzer/test_frequency.py +475 -0
- barscan-0.2.2/tests/test_analyzer/test_models.py +222 -0
- barscan-0.2.2/tests/test_analyzer/test_pos.py +95 -0
- barscan-0.2.2/tests/test_analyzer/test_processor.py +456 -0
- barscan-0.2.2/tests/test_analyzer/test_sentiment.py +119 -0
- barscan-0.2.2/tests/test_analyzer/test_slang.py +139 -0
- barscan-0.2.2/tests/test_analyzer/test_tfidf.py +178 -0
- barscan-0.2.2/tests/test_analyzer/test_tokenizer.py +457 -0
- barscan-0.2.2/tests/test_cli/__init__.py +1 -0
- barscan-0.2.2/tests/test_cli/conftest.py +90 -0
- barscan-0.2.2/tests/test_cli/test_commands.py +654 -0
- barscan-0.2.2/tests/test_genius/__init__.py +1 -0
- barscan-0.2.2/tests/test_genius/conftest.py +81 -0
- barscan-0.2.2/tests/test_genius/test_cache.py +463 -0
- barscan-0.2.2/tests/test_genius/test_client.py +553 -0
- barscan-0.2.2/tests/test_genius/test_models.py +181 -0
- barscan-0.2.2/tests/test_main.py +34 -0
- barscan-0.2.2/tests/test_output/__init__.py +1 -0
- barscan-0.2.2/tests/test_output/test_wordgrain.py +496 -0
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
name: CI
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
push:
|
|
5
|
+
branches: [main]
|
|
6
|
+
pull_request:
|
|
7
|
+
branches: [main]
|
|
8
|
+
|
|
9
|
+
jobs:
|
|
10
|
+
test:
|
|
11
|
+
runs-on: ubuntu-latest
|
|
12
|
+
strategy:
|
|
13
|
+
matrix:
|
|
14
|
+
python-version: ["3.11", "3.12"]
|
|
15
|
+
|
|
16
|
+
steps:
|
|
17
|
+
- uses: actions/checkout@v4
|
|
18
|
+
|
|
19
|
+
- name: Set up Python ${{ matrix.python-version }}
|
|
20
|
+
uses: actions/setup-python@v5
|
|
21
|
+
with:
|
|
22
|
+
python-version: ${{ matrix.python-version }}
|
|
23
|
+
|
|
24
|
+
- name: Install dependencies
|
|
25
|
+
run: pip install -e ".[dev]"
|
|
26
|
+
|
|
27
|
+
- name: Lint
|
|
28
|
+
run: ruff check src/
|
|
29
|
+
|
|
30
|
+
- name: Format check
|
|
31
|
+
run: ruff format --check src/
|
|
32
|
+
|
|
33
|
+
- name: Type check
|
|
34
|
+
run: mypy src/barscan/ --ignore-missing-imports
|
|
35
|
+
|
|
36
|
+
- name: Test
|
|
37
|
+
run: pytest
|
|
@@ -0,0 +1,72 @@
|
|
|
1
|
+
name: Release
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
push:
|
|
5
|
+
tags:
|
|
6
|
+
- 'v*'
|
|
7
|
+
|
|
8
|
+
jobs:
|
|
9
|
+
test:
|
|
10
|
+
runs-on: ubuntu-latest
|
|
11
|
+
strategy:
|
|
12
|
+
matrix:
|
|
13
|
+
python-version: ["3.11", "3.12"]
|
|
14
|
+
|
|
15
|
+
steps:
|
|
16
|
+
- uses: actions/checkout@v4
|
|
17
|
+
|
|
18
|
+
- name: Set up Python ${{ matrix.python-version }}
|
|
19
|
+
uses: actions/setup-python@v5
|
|
20
|
+
with:
|
|
21
|
+
python-version: ${{ matrix.python-version }}
|
|
22
|
+
|
|
23
|
+
- name: Install dependencies
|
|
24
|
+
run: pip install -e ".[dev]"
|
|
25
|
+
|
|
26
|
+
- name: Lint
|
|
27
|
+
run: ruff check src/
|
|
28
|
+
|
|
29
|
+
- name: Format check
|
|
30
|
+
run: ruff format --check src/
|
|
31
|
+
|
|
32
|
+
- name: Type check
|
|
33
|
+
run: mypy src/barscan/ --ignore-missing-imports
|
|
34
|
+
|
|
35
|
+
- name: Test
|
|
36
|
+
run: pytest
|
|
37
|
+
|
|
38
|
+
release:
|
|
39
|
+
needs: test
|
|
40
|
+
runs-on: ubuntu-latest
|
|
41
|
+
permissions:
|
|
42
|
+
contents: write
|
|
43
|
+
id-token: write
|
|
44
|
+
environment:
|
|
45
|
+
name: pypi
|
|
46
|
+
url: https://pypi.org/p/barscan
|
|
47
|
+
|
|
48
|
+
steps:
|
|
49
|
+
- uses: actions/checkout@v4
|
|
50
|
+
|
|
51
|
+
- name: Set up Python
|
|
52
|
+
uses: actions/setup-python@v5
|
|
53
|
+
with:
|
|
54
|
+
python-version: "3.12"
|
|
55
|
+
|
|
56
|
+
- name: Install build dependencies
|
|
57
|
+
run: pip install build
|
|
58
|
+
|
|
59
|
+
- name: Build package
|
|
60
|
+
run: python -m build
|
|
61
|
+
|
|
62
|
+
- name: Create GitHub Release
|
|
63
|
+
env:
|
|
64
|
+
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
|
65
|
+
run: |
|
|
66
|
+
gh release create ${{ github.ref_name }} \
|
|
67
|
+
--title "Release ${{ github.ref_name }}" \
|
|
68
|
+
--generate-notes \
|
|
69
|
+
dist/*
|
|
70
|
+
|
|
71
|
+
- name: Publish to PyPI
|
|
72
|
+
uses: pypa/gh-action-pypi-publish@release/v1
|
barscan-0.2.2/.gitignore
ADDED
|
@@ -0,0 +1,84 @@
|
|
|
1
|
+
# Byte-compiled / optimized / DLL files
|
|
2
|
+
__pycache__/
|
|
3
|
+
*.py[cod]
|
|
4
|
+
*$py.class
|
|
5
|
+
|
|
6
|
+
# C extensions
|
|
7
|
+
*.so
|
|
8
|
+
|
|
9
|
+
# Distribution / packaging
|
|
10
|
+
.Python
|
|
11
|
+
build/
|
|
12
|
+
develop-eggs/
|
|
13
|
+
dist/
|
|
14
|
+
downloads/
|
|
15
|
+
eggs/
|
|
16
|
+
.eggs/
|
|
17
|
+
lib/
|
|
18
|
+
lib64/
|
|
19
|
+
parts/
|
|
20
|
+
sdist/
|
|
21
|
+
var/
|
|
22
|
+
wheels/
|
|
23
|
+
*.egg-info/
|
|
24
|
+
.installed.cfg
|
|
25
|
+
*.egg
|
|
26
|
+
|
|
27
|
+
# PyInstaller
|
|
28
|
+
*.manifest
|
|
29
|
+
*.spec
|
|
30
|
+
|
|
31
|
+
# Installer logs
|
|
32
|
+
pip-log.txt
|
|
33
|
+
pip-delete-this-directory.txt
|
|
34
|
+
|
|
35
|
+
# Unit test / coverage reports
|
|
36
|
+
htmlcov/
|
|
37
|
+
.tox/
|
|
38
|
+
.nox/
|
|
39
|
+
.coverage
|
|
40
|
+
.coverage.*
|
|
41
|
+
.cache
|
|
42
|
+
nosetests.xml
|
|
43
|
+
coverage.xml
|
|
44
|
+
*.cover
|
|
45
|
+
*.py,cover
|
|
46
|
+
.hypothesis/
|
|
47
|
+
.pytest_cache/
|
|
48
|
+
|
|
49
|
+
# Translations
|
|
50
|
+
*.mo
|
|
51
|
+
*.pot
|
|
52
|
+
|
|
53
|
+
# Environments
|
|
54
|
+
.env
|
|
55
|
+
.venv
|
|
56
|
+
env/
|
|
57
|
+
venv/
|
|
58
|
+
ENV/
|
|
59
|
+
env.bak/
|
|
60
|
+
venv.bak/
|
|
61
|
+
|
|
62
|
+
# mypy
|
|
63
|
+
.mypy_cache/
|
|
64
|
+
.dmypy.json
|
|
65
|
+
dmypy.json
|
|
66
|
+
|
|
67
|
+
# ruff
|
|
68
|
+
.ruff_cache/
|
|
69
|
+
|
|
70
|
+
# IDE
|
|
71
|
+
.idea/
|
|
72
|
+
.vscode/
|
|
73
|
+
*.swp
|
|
74
|
+
*.swo
|
|
75
|
+
*~
|
|
76
|
+
|
|
77
|
+
# OS
|
|
78
|
+
.DS_Store
|
|
79
|
+
Thumbs.db
|
|
80
|
+
|
|
81
|
+
# Project specific
|
|
82
|
+
.cache/
|
|
83
|
+
*.log
|
|
84
|
+
CLAUDE.md
|
barscan-0.2.2/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2024 shimpeiws
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
barscan-0.2.2/PKG-INFO
ADDED
|
@@ -0,0 +1,354 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: barscan
|
|
3
|
+
Version: 0.2.2
|
|
4
|
+
Summary: Lyrics word frequency analyzer using Genius API
|
|
5
|
+
Project-URL: Homepage, https://github.com/shimpeiws/barscan
|
|
6
|
+
Project-URL: Repository, https://github.com/shimpeiws/barscan.git
|
|
7
|
+
Project-URL: Issues, https://github.com/shimpeiws/barscan/issues
|
|
8
|
+
Project-URL: Changelog, https://github.com/shimpeiws/barscan/releases
|
|
9
|
+
Project-URL: WordGrain Schema, https://github.com/shimpeiws/word-grain
|
|
10
|
+
Author-email: shimpeiws <shimpeiws@gmail.com>
|
|
11
|
+
License: MIT
|
|
12
|
+
License-File: LICENSE
|
|
13
|
+
Keywords: cli,genius,lyrics,nlp,word-frequency
|
|
14
|
+
Classifier: Development Status :: 3 - Alpha
|
|
15
|
+
Classifier: Environment :: Console
|
|
16
|
+
Classifier: Intended Audience :: End Users/Desktop
|
|
17
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
18
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
19
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
20
|
+
Classifier: Topic :: Text Processing :: Linguistic
|
|
21
|
+
Requires-Python: >=3.11
|
|
22
|
+
Requires-Dist: lyricsgenius>=3.0.1
|
|
23
|
+
Requires-Dist: nltk>=3.9.0
|
|
24
|
+
Requires-Dist: pydantic-settings>=2.1.0
|
|
25
|
+
Requires-Dist: pydantic>=2.5.0
|
|
26
|
+
Requires-Dist: rich>=13.7.0
|
|
27
|
+
Requires-Dist: typer[all]>=0.15.0
|
|
28
|
+
Provides-Extra: dev
|
|
29
|
+
Requires-Dist: mypy>=1.8.0; extra == 'dev'
|
|
30
|
+
Requires-Dist: pytest-cov>=4.1.0; extra == 'dev'
|
|
31
|
+
Requires-Dist: pytest-mock>=3.12.0; extra == 'dev'
|
|
32
|
+
Requires-Dist: pytest>=8.0.0; extra == 'dev'
|
|
33
|
+
Requires-Dist: ruff>=0.4.0; extra == 'dev'
|
|
34
|
+
Requires-Dist: types-requests>=2.31.0; extra == 'dev'
|
|
35
|
+
Provides-Extra: japanese
|
|
36
|
+
Requires-Dist: janome>=0.5.0; extra == 'japanese'
|
|
37
|
+
Requires-Dist: stopwordsiso>=0.6.1; extra == 'japanese'
|
|
38
|
+
Description-Content-Type: text/markdown
|
|
39
|
+
|
|
40
|
+
# BarScan
|
|
41
|
+
|
|
42
|
+
A Python CLI tool that analyzes word frequency in song lyrics using the Genius API.
|
|
43
|
+
|
|
44
|
+
## Features
|
|
45
|
+
|
|
46
|
+
- Fetch lyrics for any artist from the Genius API
|
|
47
|
+
- Analyze word frequency across multiple songs
|
|
48
|
+
- Natural language processing with NLTK for accurate tokenization
|
|
49
|
+
- Customizable stop word filtering and exclusions
|
|
50
|
+
- Multiple output formats: table, JSON, CSV, and WordGrain
|
|
51
|
+
- Local caching to reduce API calls and improve performance
|
|
52
|
+
- Retry logic with exponential backoff for robust API communication
|
|
53
|
+
|
|
54
|
+
## Installation
|
|
55
|
+
|
|
56
|
+
### Prerequisites
|
|
57
|
+
|
|
58
|
+
- Python 3.11 or higher
|
|
59
|
+
- pip (latest version recommended)
|
|
60
|
+
|
|
61
|
+
### From PyPI (when published)
|
|
62
|
+
|
|
63
|
+
```bash
|
|
64
|
+
pip install barscan
|
|
65
|
+
```
|
|
66
|
+
|
|
67
|
+
### From Source
|
|
68
|
+
|
|
69
|
+
```bash
|
|
70
|
+
git clone https://github.com/shimpeiws/barscan.git
|
|
71
|
+
cd barscan
|
|
72
|
+
pip install -e ".[dev]"
|
|
73
|
+
```
|
|
74
|
+
|
|
75
|
+
## Setup
|
|
76
|
+
|
|
77
|
+
### Getting a Genius API Token
|
|
78
|
+
|
|
79
|
+
1. Go to [Genius API Clients](https://genius.com/api-clients)
|
|
80
|
+
2. Sign in with your Genius account (or create one)
|
|
81
|
+
3. Click "Create an API Client"
|
|
82
|
+
4. Fill in the app details:
|
|
83
|
+
- App Name: Any name (e.g., "BarScan CLI")
|
|
84
|
+
- App Website URL: Any URL (e.g., your GitHub profile)
|
|
85
|
+
- Redirect URI: Leave default or use `http://localhost`
|
|
86
|
+
5. Click "Save"
|
|
87
|
+
6. Copy the "Client Access Token" (not the Client ID or Secret)
|
|
88
|
+
|
|
89
|
+
### Configuring the Token
|
|
90
|
+
|
|
91
|
+
Set the token as an environment variable:
|
|
92
|
+
|
|
93
|
+
```bash
|
|
94
|
+
export BARSCAN_GENIUS_ACCESS_TOKEN=your_token_here
|
|
95
|
+
```
|
|
96
|
+
|
|
97
|
+
Or create a `.env` file in your project directory:
|
|
98
|
+
|
|
99
|
+
```bash
|
|
100
|
+
BARSCAN_GENIUS_ACCESS_TOKEN=your_token_here
|
|
101
|
+
```
|
|
102
|
+
|
|
103
|
+
## Usage
|
|
104
|
+
|
|
105
|
+
### Basic Analysis
|
|
106
|
+
|
|
107
|
+
Analyze the most common words in an artist's lyrics:
|
|
108
|
+
|
|
109
|
+
```bash
|
|
110
|
+
barscan analyze "Kendrick Lamar"
|
|
111
|
+
```
|
|
112
|
+
|
|
113
|
+
### Command Options
|
|
114
|
+
|
|
115
|
+
```bash
|
|
116
|
+
# Analyze more songs
|
|
117
|
+
barscan analyze "Drake" --max-songs 20
|
|
118
|
+
|
|
119
|
+
# Show more words in results
|
|
120
|
+
barscan analyze "J. Cole" --top 100
|
|
121
|
+
|
|
122
|
+
# Combine options
|
|
123
|
+
barscan analyze "Tyler, The Creator" -n 15 -t 50
|
|
124
|
+
```
|
|
125
|
+
|
|
126
|
+
### Output Formats
|
|
127
|
+
|
|
128
|
+
```bash
|
|
129
|
+
# Default table format (console)
|
|
130
|
+
barscan analyze "Beyonce"
|
|
131
|
+
|
|
132
|
+
# JSON format
|
|
133
|
+
barscan analyze "Beyonce" --format json
|
|
134
|
+
|
|
135
|
+
# CSV format
|
|
136
|
+
barscan analyze "Beyonce" --format csv
|
|
137
|
+
|
|
138
|
+
# WordGrain format (structured JSON schema)
|
|
139
|
+
barscan analyze "Beyonce" --format wordgrain
|
|
140
|
+
|
|
141
|
+
# Save to file
|
|
142
|
+
barscan analyze "Beyonce" --format json --output results.json
|
|
143
|
+
```
|
|
144
|
+
|
|
145
|
+
### Filtering Options
|
|
146
|
+
|
|
147
|
+
```bash
|
|
148
|
+
# Disable stop word filtering (include "the", "a", "is", etc.)
|
|
149
|
+
barscan analyze "Eminem" --no-stop-words
|
|
150
|
+
|
|
151
|
+
# Exclude specific words
|
|
152
|
+
barscan analyze "Eminem" --exclude "yeah" --exclude "oh"
|
|
153
|
+
|
|
154
|
+
# Combine exclusions
|
|
155
|
+
barscan analyze "Eminem" -e "uh" -e "like" -e "yo"
|
|
156
|
+
```
|
|
157
|
+
|
|
158
|
+
### Cache Management
|
|
159
|
+
|
|
160
|
+
BarScan caches lyrics locally to reduce API calls:
|
|
161
|
+
|
|
162
|
+
```bash
|
|
163
|
+
# Clear all cached lyrics
|
|
164
|
+
barscan clear-cache --force
|
|
165
|
+
|
|
166
|
+
# Clear only expired cache entries
|
|
167
|
+
barscan clear-cache --expired-only --force
|
|
168
|
+
|
|
169
|
+
# Interactive confirmation (without --force)
|
|
170
|
+
barscan clear-cache
|
|
171
|
+
```
|
|
172
|
+
|
|
173
|
+
### View Configuration
|
|
174
|
+
|
|
175
|
+
```bash
|
|
176
|
+
# Show current configuration and cache statistics
|
|
177
|
+
barscan config
|
|
178
|
+
```
|
|
179
|
+
|
|
180
|
+
## Configuration Options
|
|
181
|
+
|
|
182
|
+
All settings can be configured via environment variables with the `BARSCAN_` prefix:
|
|
183
|
+
|
|
184
|
+
| Variable | Description | Default |
|
|
185
|
+
|----------|-------------|---------|
|
|
186
|
+
| `BARSCAN_GENIUS_ACCESS_TOKEN` | Genius API access token | (required) |
|
|
187
|
+
| `BARSCAN_CACHE_DIR` | Directory for caching lyrics | `~/.cache/barscan` |
|
|
188
|
+
| `BARSCAN_CACHE_TTL_HOURS` | Cache time-to-live in hours | `168` (7 days) |
|
|
189
|
+
| `BARSCAN_DEFAULT_MAX_SONGS` | Default number of songs to analyze | `10` |
|
|
190
|
+
| `BARSCAN_DEFAULT_TOP_WORDS` | Default number of top words to show | `50` |
|
|
191
|
+
|
|
192
|
+
## Output Formats
|
|
193
|
+
|
|
194
|
+
### Table Format (default)
|
|
195
|
+
|
|
196
|
+
Human-readable table with word rankings:
|
|
197
|
+
|
|
198
|
+
```
|
|
199
|
+
Artist: Kendrick Lamar
|
|
200
|
+
Songs analyzed: 10
|
|
201
|
+
Total words: 5,432
|
|
202
|
+
Unique words: 1,203
|
|
203
|
+
|
|
204
|
+
Word Frequencies
|
|
205
|
+
┌──────┬─────────┬───────┬────────────┐
|
|
206
|
+
│ Rank │ Word │ Count │ Percentage │
|
|
207
|
+
├──────┼─────────┼───────┼────────────┤
|
|
208
|
+
│ 1 │ love │ 87 │ 1.60% │
|
|
209
|
+
│ 2 │ know │ 65 │ 1.20% │
|
|
210
|
+
│ ... │ ... │ ... │ ... │
|
|
211
|
+
└──────┴─────────┴───────┴────────────┘
|
|
212
|
+
```
|
|
213
|
+
|
|
214
|
+
### JSON Format
|
|
215
|
+
|
|
216
|
+
Structured JSON for programmatic use:
|
|
217
|
+
|
|
218
|
+
```json
|
|
219
|
+
{
|
|
220
|
+
"artist": "Kendrick Lamar",
|
|
221
|
+
"songs_analyzed": 10,
|
|
222
|
+
"total_words": 5432,
|
|
223
|
+
"unique_words": 1203,
|
|
224
|
+
"frequencies": [
|
|
225
|
+
{"word": "love", "count": 87, "percentage": 1.60},
|
|
226
|
+
{"word": "know", "count": 65, "percentage": 1.20}
|
|
227
|
+
]
|
|
228
|
+
}
|
|
229
|
+
```
|
|
230
|
+
|
|
231
|
+
### CSV Format
|
|
232
|
+
|
|
233
|
+
Comma-separated values for spreadsheet import:
|
|
234
|
+
|
|
235
|
+
```csv
|
|
236
|
+
word,count,percentage
|
|
237
|
+
love,87,1.60
|
|
238
|
+
know,65,1.20
|
|
239
|
+
```
|
|
240
|
+
|
|
241
|
+
### WordGrain Format
|
|
242
|
+
|
|
243
|
+
[WordGrain](https://github.com/shimpeiws/word-grain) is a standardized JSON schema for vocabulary analysis data. It enables interoperability between different word frequency analysis tools.
|
|
244
|
+
|
|
245
|
+
Output example:
|
|
246
|
+
|
|
247
|
+
```json
|
|
248
|
+
{
|
|
249
|
+
"$schema": "https://raw.githubusercontent.com/shimpeiws/word-grain/main/schema/v0.1.0/wordgrain.schema.json",
|
|
250
|
+
"meta": {
|
|
251
|
+
"source": "genius",
|
|
252
|
+
"artist": "Kendrick Lamar",
|
|
253
|
+
"generated_at": "2024-01-15T10:30:00Z",
|
|
254
|
+
"corpus_size": 10,
|
|
255
|
+
"total_words": 5432,
|
|
256
|
+
"generator": "barscan/0.1.0",
|
|
257
|
+
"language": "en"
|
|
258
|
+
},
|
|
259
|
+
"grains": [
|
|
260
|
+
{"word": "love", "frequency": 87, "frequency_normalized": 160.18}
|
|
261
|
+
]
|
|
262
|
+
}
|
|
263
|
+
```
|
|
264
|
+
|
|
265
|
+
## Development
|
|
266
|
+
|
|
267
|
+
### Setup
|
|
268
|
+
|
|
269
|
+
```bash
|
|
270
|
+
# Clone repository
|
|
271
|
+
git clone https://github.com/shimpeiws/barscan.git
|
|
272
|
+
cd barscan
|
|
273
|
+
|
|
274
|
+
# Install with development dependencies
|
|
275
|
+
pip install -e ".[dev]"
|
|
276
|
+
```
|
|
277
|
+
|
|
278
|
+
### Running Tests
|
|
279
|
+
|
|
280
|
+
```bash
|
|
281
|
+
# Run all tests with coverage
|
|
282
|
+
pytest
|
|
283
|
+
|
|
284
|
+
# Run specific test file
|
|
285
|
+
pytest tests/test_genius/test_client.py -v
|
|
286
|
+
|
|
287
|
+
# Run specific test
|
|
288
|
+
pytest tests/test_genius/test_client.py::TestSearchArtist::test_search_artist_success -v
|
|
289
|
+
```
|
|
290
|
+
|
|
291
|
+
### Code Quality
|
|
292
|
+
|
|
293
|
+
```bash
|
|
294
|
+
# Lint code
|
|
295
|
+
ruff check src/
|
|
296
|
+
|
|
297
|
+
# Format code
|
|
298
|
+
ruff format src/
|
|
299
|
+
|
|
300
|
+
# Type check
|
|
301
|
+
mypy src/barscan/ --ignore-missing-imports
|
|
302
|
+
```
|
|
303
|
+
|
|
304
|
+
## Architecture
|
|
305
|
+
|
|
306
|
+
```
|
|
307
|
+
src/barscan/
|
|
308
|
+
├── cli.py # Typer CLI entry point (barscan command)
|
|
309
|
+
├── config.py # Pydantic Settings configuration
|
|
310
|
+
├── exceptions.py # Exception hierarchy (BarScanError base)
|
|
311
|
+
├── genius/ # Genius API integration
|
|
312
|
+
│ ├── models.py # Pydantic models (Artist, Song, Lyrics)
|
|
313
|
+
│ ├── client.py # GeniusClient with retry logic
|
|
314
|
+
│ └── cache.py # File-based lyrics cache with TTL
|
|
315
|
+
├── analyzer/ # Word frequency analysis
|
|
316
|
+
│ ├── models.py # Analysis result models
|
|
317
|
+
│ ├── processor.py # Text preprocessing with NLTK
|
|
318
|
+
│ ├── filters.py # Stop word and length filtering
|
|
319
|
+
│ └── frequency.py # Word counting and aggregation
|
|
320
|
+
└── output/ # Result formatting
|
|
321
|
+
└── wordgrain.py # WordGrain schema export
|
|
322
|
+
```
|
|
323
|
+
|
|
324
|
+
## Troubleshooting
|
|
325
|
+
|
|
326
|
+
### "Genius API token not configured"
|
|
327
|
+
|
|
328
|
+
Make sure you've set the `BARSCAN_GENIUS_ACCESS_TOKEN` environment variable or created a `.env` file with the token.
|
|
329
|
+
|
|
330
|
+
### "Artist not found"
|
|
331
|
+
|
|
332
|
+
- Check the spelling of the artist name
|
|
333
|
+
- Try using the artist's name exactly as it appears on Genius
|
|
334
|
+
- Some artists may have limited or no presence on Genius
|
|
335
|
+
|
|
336
|
+
### Rate Limiting
|
|
337
|
+
|
|
338
|
+
BarScan includes automatic retry logic with exponential backoff. If you encounter rate limiting:
|
|
339
|
+
|
|
340
|
+
- The tool will automatically retry failed requests
|
|
341
|
+
- Consider reducing `--max-songs` for large analyses
|
|
342
|
+
- Cached lyrics won't trigger new API calls
|
|
343
|
+
|
|
344
|
+
### Empty Results
|
|
345
|
+
|
|
346
|
+
If no words appear in results after filtering:
|
|
347
|
+
|
|
348
|
+
- Try `--no-stop-words` to include common words
|
|
349
|
+
- Check if the artist has lyrics available on Genius
|
|
350
|
+
- Some songs may be instrumental or have no lyrics
|
|
351
|
+
|
|
352
|
+
## License
|
|
353
|
+
|
|
354
|
+
MIT
|